OmniSciDB  06b3bd477c
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
QueryMemoryInitializer Class Reference

#include <QueryMemoryInitializer.h>

+ Collaboration diagram for QueryMemoryInitializer:

Public Member Functions

 QueryMemoryInitializer (const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc, const int device_id, const ExecutorDeviceType device_type, const ExecutorDispatchMode dispatch_mode, const bool output_columnar, const bool sort_on_gpu, const int64_t num_rows, const std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< std::vector< uint64_t >> &frag_offsets, RenderAllocatorMap *render_allocator_map, RenderInfo *render_info, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, DeviceAllocator *gpu_allocator, const Executor *executor)
 
 QueryMemoryInitializer (const TableFunctionExecutionUnit &exe_unit, const QueryMemoryDescriptor &query_mem_desc, const int device_id, const ExecutorDeviceType device_type, const int64_t num_rows, const std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< std::vector< uint64_t >> &frag_offsets, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, DeviceAllocator *device_allocator, const Executor *executor)
 
const auto getCountDistinctBitmapPtr () const
 
const auto getCountDistinctHostPtr () const
 
const auto getCountDistinctBitmapBytes () const
 
ResultSetgetResultSet (const size_t index) const
 
std::unique_ptr< ResultSetgetResultSetOwned (const size_t index)
 
void resetResultSet (const size_t index)
 
int64_t getAggInitValForIndex (const size_t index) const
 
const auto getGroupByBuffersPtr ()
 
const auto getGroupByBuffersSize () const
 
const auto getNumBuffers () const
 
void copyGroupByBuffersFromGpu (Data_Namespace::DataMgr *data_mgr, const QueryMemoryDescriptor &query_mem_desc, const size_t entry_count, const GpuGroupByBuffers &gpu_group_by_buffers, const RelAlgExecutionUnit *ra_exe_unit, const unsigned block_size_x, const unsigned grid_size_x, const int device_id, const bool prepend_index_buffer) const
 

Private Member Functions

void initGroupByBuffer (int64_t *buffer, const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, const bool output_columnar, const Executor *executor)
 
void initGroups (const QueryMemoryDescriptor &query_mem_desc, int64_t *groups_buffer, const std::vector< int64_t > &init_vals, const int32_t groups_buffer_entry_count, const size_t warp_size, const Executor *executor)
 
void initColumnarGroups (const QueryMemoryDescriptor &query_mem_desc, int64_t *groups_buffer, const std::vector< int64_t > &init_vals, const Executor *executor)
 
void initColumnPerRow (const QueryMemoryDescriptor &query_mem_desc, int8_t *row_ptr, const size_t bin, const std::vector< int64_t > &init_vals, const std::vector< ssize_t > &bitmap_sizes)
 
void allocateCountDistinctGpuMem (const QueryMemoryDescriptor &query_mem_desc)
 
std::vector< ssize_t > allocateCountDistinctBuffers (const QueryMemoryDescriptor &query_mem_desc, const bool deferred, const Executor *executor)
 
int64_t allocateCountDistinctBitmap (const size_t bitmap_byte_sz)
 
int64_t allocateCountDistinctSet ()
 
size_t computeNumberOfBuffers (const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, const Executor *executor) const
 
void compactProjectionBuffersCpu (const QueryMemoryDescriptor &query_mem_desc, const size_t projection_count)
 
void compactProjectionBuffersGpu (const QueryMemoryDescriptor &query_mem_desc, Data_Namespace::DataMgr *data_mgr, const GpuGroupByBuffers &gpu_group_by_buffers, const size_t projection_count, const int device_id)
 
void applyStreamingTopNOffsetCpu (const QueryMemoryDescriptor &query_mem_desc, const RelAlgExecutionUnit &ra_exe_unit)
 
void applyStreamingTopNOffsetGpu (Data_Namespace::DataMgr *data_mgr, const QueryMemoryDescriptor &query_mem_desc, const GpuGroupByBuffers &gpu_group_by_buffers, const RelAlgExecutionUnit &ra_exe_unit, const unsigned total_thread_count, const int device_id)
 

Private Attributes

const int64_t num_rows_
 
std::shared_ptr
< RowSetMemoryOwner
row_set_mem_owner_
 
std::vector< std::unique_ptr
< ResultSet > > 
result_sets_
 
std::vector< int64_t > init_agg_vals_
 
const size_t num_buffers_
 
std::vector< int64_t * > group_by_buffers_
 
CUdeviceptr count_distinct_bitmap_mem_
 
size_t count_distinct_bitmap_mem_bytes_
 
int8_t * count_distinct_bitmap_crt_ptr_
 
int8_t * count_distinct_bitmap_host_mem_
 
DeviceAllocatordevice_allocator_ {nullptr}
 
std::vector
< Data_Namespace::AbstractBuffer * > 
temporary_buffers_
 

Friends

class Executor
 
class QueryExecutionContext
 

Detailed Description

Definition at line 35 of file QueryMemoryInitializer.h.

Constructor & Destructor Documentation

QueryMemoryInitializer::QueryMemoryInitializer ( const RelAlgExecutionUnit ra_exe_unit,
const QueryMemoryDescriptor query_mem_desc,
const int  device_id,
const ExecutorDeviceType  device_type,
const ExecutorDispatchMode  dispatch_mode,
const bool  output_columnar,
const bool  sort_on_gpu,
const int64_t  num_rows,
const std::vector< std::vector< const int8_t * >> &  col_buffers,
const std::vector< std::vector< uint64_t >> &  frag_offsets,
RenderAllocatorMap render_allocator_map,
RenderInfo render_info,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
DeviceAllocator gpu_allocator,
const Executor executor 
)

Definition at line 153 of file QueryMemoryInitializer.cpp.

References anonymous_namespace{QueryMemoryInitializer.cpp}::alloc_group_by_buffer(), allocateCountDistinctBuffers(), allocateCountDistinctGpuMem(), CHECK(), CHECK_GE, anonymous_namespace{QueryMemoryInitializer.cpp}::check_total_bitmap_memory(), RelAlgExecutionUnit::estimator, ResultSet::fixupQueryMemoryDescriptor(), g_max_memory_allocation_size, anonymous_namespace{QueryMemoryInitializer.cpp}::get_col_frag_offsets(), anonymous_namespace{QueryMemoryInitializer.cpp}::get_consistent_frags_sizes(), QueryMemoryDescriptor::getBufferSizeBytes(), QueryMemoryDescriptor::getEntryCount(), QueryMemoryDescriptor::getRowSize(), GPU, group_by_buffers_, QueryMemoryDescriptor::hasKeylessHash(), initGroupByBuffer(), QueryMemoryDescriptor::interleavedBins(), QueryMemoryDescriptor::isGroupBy(), KernelPerFragment, QueryMemoryDescriptor::lazyInitGroups(), num_buffers_, result_sets_, row_set_mem_owner_, RelAlgExecutionUnit::target_exprs, target_exprs_to_infos(), QueryMemoryDescriptor::threadsShareMemory(), RelAlgExecutionUnit::use_bump_allocator, and RenderInfo::useCudaBuffers().

170  , row_set_mem_owner_(row_set_mem_owner)
171  , init_agg_vals_(executor->plan_state_->init_agg_vals_)
172  , num_buffers_(computeNumberOfBuffers(query_mem_desc, device_type, executor))
177  , device_allocator_(device_allocator) {
178  CHECK(!sort_on_gpu || output_columnar);
179 
180  const auto& consistent_frag_sizes = get_consistent_frags_sizes(frag_offsets);
181  if (consistent_frag_sizes.empty()) {
182  // No fragments in the input, no underlying buffers will be needed.
183  return;
184  }
185  if (!ra_exe_unit.use_bump_allocator) {
186  check_total_bitmap_memory(query_mem_desc);
187  }
188  if (device_type == ExecutorDeviceType::GPU) {
189  allocateCountDistinctGpuMem(query_mem_desc);
190  }
191 
192  if (render_allocator_map || !query_mem_desc.isGroupBy()) {
193  allocateCountDistinctBuffers(query_mem_desc, false, executor);
194  if (render_info && render_info->useCudaBuffers()) {
195  return;
196  }
197  }
198 
199  if (ra_exe_unit.estimator) {
200  return;
201  }
202 
203  const auto thread_count = device_type == ExecutorDeviceType::GPU
204  ? executor->blockSize() * executor->gridSize()
205  : 1;
206 
207  size_t group_buffer_size{0};
208  if (ra_exe_unit.use_bump_allocator) {
209  // For kernel per fragment execution, just allocate a buffer equivalent to the size of
210  // the fragment
211  if (dispatch_mode == ExecutorDispatchMode::KernelPerFragment) {
212  group_buffer_size = num_rows * query_mem_desc.getRowSize();
213  } else {
214  // otherwise, allocate a GPU buffer equivalent to the maximum GPU allocation size
215  group_buffer_size = g_max_memory_allocation_size / query_mem_desc.getRowSize();
216  }
217  } else {
218  group_buffer_size =
219  query_mem_desc.getBufferSizeBytes(ra_exe_unit, thread_count, device_type);
220  }
221  CHECK_GE(group_buffer_size, size_t(0));
222 
223  const auto group_buffers_count = !query_mem_desc.isGroupBy() ? 1 : num_buffers_;
224  int64_t* group_by_buffer_template{nullptr};
225  if (!query_mem_desc.lazyInitGroups(device_type) && group_buffers_count > 1) {
226  group_by_buffer_template =
227  reinterpret_cast<int64_t*>(row_set_mem_owner_->allocate(group_buffer_size));
228  initGroupByBuffer(group_by_buffer_template,
229  ra_exe_unit,
230  query_mem_desc,
231  device_type,
232  output_columnar,
233  executor);
234  }
235 
236  if (query_mem_desc.interleavedBins(device_type)) {
237  CHECK(query_mem_desc.hasKeylessHash());
238  }
239 
240  const auto step = device_type == ExecutorDeviceType::GPU &&
241  query_mem_desc.threadsShareMemory() &&
242  query_mem_desc.isGroupBy()
243  ? executor->blockSize()
244  : size_t(1);
245  const auto index_buffer_qw = device_type == ExecutorDeviceType::GPU && sort_on_gpu &&
246  query_mem_desc.hasKeylessHash()
247  ? query_mem_desc.getEntryCount()
248  : size_t(0);
249  const auto actual_group_buffer_size =
250  group_buffer_size + index_buffer_qw * sizeof(int64_t);
251  CHECK_GE(actual_group_buffer_size, group_buffer_size);
252 
253  for (size_t i = 0; i < group_buffers_count; i += step) {
254  auto group_by_buffer = alloc_group_by_buffer(
255  actual_group_buffer_size, render_allocator_map, row_set_mem_owner_.get());
256  if (!query_mem_desc.lazyInitGroups(device_type)) {
257  if (group_by_buffer_template) {
258  memcpy(group_by_buffer + index_buffer_qw,
259  group_by_buffer_template,
260  group_buffer_size);
261  } else {
262  initGroupByBuffer(group_by_buffer + index_buffer_qw,
263  ra_exe_unit,
264  query_mem_desc,
265  device_type,
266  output_columnar,
267  executor);
268  }
269  }
270  group_by_buffers_.push_back(group_by_buffer);
271  for (size_t j = 1; j < step; ++j) {
272  group_by_buffers_.push_back(nullptr);
273  }
274  const auto column_frag_offsets =
275  get_col_frag_offsets(ra_exe_unit.target_exprs, frag_offsets);
276  const auto column_frag_sizes =
277  get_consistent_frags_sizes(ra_exe_unit.target_exprs, consistent_frag_sizes);
278  result_sets_.emplace_back(
279  new ResultSet(target_exprs_to_infos(ra_exe_unit.target_exprs, query_mem_desc),
280  executor->getColLazyFetchInfo(ra_exe_unit.target_exprs),
281  col_buffers,
282  column_frag_offsets,
283  column_frag_sizes,
284  device_type,
285  device_id,
288  executor));
289  result_sets_.back()->allocateStorage(reinterpret_cast<int8_t*>(group_by_buffer),
290  executor->plan_state_->init_agg_vals_);
291  for (size_t j = 1; j < step; ++j) {
292  result_sets_.emplace_back(nullptr);
293  }
294  }
295 }
std::vector< Analyzer::Expr * > target_exprs
size_t getBufferSizeBytes(const RelAlgExecutionUnit &ra_exe_unit, const unsigned thread_count, const ExecutorDeviceType device_type) const
std::vector< ssize_t > allocateCountDistinctBuffers(const QueryMemoryDescriptor &query_mem_desc, const bool deferred, const Executor *executor)
bool useCudaBuffers() const
Definition: RenderInfo.cpp:69
const int8_t const int64_t * num_rows
DeviceAllocator * device_allocator_
#define CHECK_GE(x, y)
Definition: Logger.h:210
void check_total_bitmap_memory(const QueryMemoryDescriptor &query_mem_desc)
size_t computeNumberOfBuffers(const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, const Executor *executor) const
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
CHECK(cgen_state)
std::vector< int64_t > init_agg_vals_
bool lazyInitGroups(const ExecutorDeviceType) const
size_t g_max_memory_allocation_size
Definition: Execute.cpp:100
const std::shared_ptr< Analyzer::Estimator > estimator
void initGroupByBuffer(int64_t *buffer, const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, const bool output_columnar, const Executor *executor)
std::vector< int64_t * > group_by_buffers_
std::vector< int64_t > get_consistent_frags_sizes(const std::vector< std::vector< uint64_t >> &frag_offsets)
static QueryMemoryDescriptor fixupQueryMemoryDescriptor(const QueryMemoryDescriptor &)
Definition: ResultSet.cpp:506
bool interleavedBins(const ExecutorDeviceType) const
std::vector< TargetInfo > target_exprs_to_infos(const std::vector< Analyzer::Expr * > &targets, const QueryMemoryDescriptor &query_mem_desc)
void sort_on_gpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes, ThrustAllocator &alloc)
std::vector< std::unique_ptr< ResultSet > > result_sets_
int64_t * alloc_group_by_buffer(const size_t numBytes, RenderAllocatorMap *render_allocator_map, RowSetMemoryOwner *mem_owner)
void allocateCountDistinctGpuMem(const QueryMemoryDescriptor &query_mem_desc)
std::vector< std::vector< int64_t > > get_col_frag_offsets(const std::vector< Analyzer::Expr * > &target_exprs, const std::vector< std::vector< uint64_t >> &table_frag_offsets)

+ Here is the call graph for this function:

QueryMemoryInitializer::QueryMemoryInitializer ( const TableFunctionExecutionUnit exe_unit,
const QueryMemoryDescriptor query_mem_desc,
const int  device_id,
const ExecutorDeviceType  device_type,
const int64_t  num_rows,
const std::vector< std::vector< const int8_t * >> &  col_buffers,
const std::vector< std::vector< uint64_t >> &  frag_offsets,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
DeviceAllocator device_allocator,
const Executor executor 
)

Definition at line 297 of file QueryMemoryInitializer.cpp.

309  , row_set_mem_owner_(row_set_mem_owner)
const int8_t const int64_t * num_rows
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
std::vector< int64_t > init_agg_vals_
std::vector< Analyzer::Expr * > target_exprs
std::vector< int64_t > init_agg_val_vec(const std::vector< TargetInfo > &targets, const QueryMemoryDescriptor &query_mem_desc)

Member Function Documentation

int64_t QueryMemoryInitializer::allocateCountDistinctBitmap ( const size_t  bitmap_byte_sz)
private

Definition at line 645 of file QueryMemoryInitializer.cpp.

References CHECK(), count_distinct_bitmap_crt_ptr_, count_distinct_bitmap_host_mem_, and row_set_mem_owner_.

Referenced by allocateCountDistinctBuffers(), and initColumnPerRow().

645  {
649  count_distinct_bitmap_crt_ptr_ += bitmap_byte_sz;
650  row_set_mem_owner_->addCountDistinctBuffer(
651  ptr, bitmap_byte_sz, /*physial_buffer=*/false);
652  return reinterpret_cast<int64_t>(ptr);
653  }
654  return reinterpret_cast<int64_t>(
655  row_set_mem_owner_->allocateCountDistinctBuffer(bitmap_byte_sz));
656 }
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
CHECK(cgen_state)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< ssize_t > QueryMemoryInitializer::allocateCountDistinctBuffers ( const QueryMemoryDescriptor query_mem_desc,
const bool  deferred,
const Executor executor 
)
private

Definition at line 599 of file QueryMemoryInitializer.cpp.

References agg_col_count, allocateCountDistinctBitmap(), allocateCountDistinctSet(), Bitmap, CHECK(), CHECK_EQ, CHECK_GE, CHECK_LT, g_bigint_count, get_target_info(), QueryMemoryDescriptor::getCountDistinctDescriptor(), QueryMemoryDescriptor::getLogicalSlotWidthBytes(), QueryMemoryDescriptor::getSlotCount(), QueryMemoryDescriptor::getSlotIndexForSingleSlotCol(), init_agg_vals_, Invalid, is_distinct_target(), kAPPROX_COUNT_DISTINCT, kCOUNT, and StdSet.

Referenced by initGroups(), and QueryMemoryInitializer().

602  {
603  const size_t agg_col_count{query_mem_desc.getSlotCount()};
604  std::vector<ssize_t> agg_bitmap_size(deferred ? agg_col_count : 0);
605 
606  CHECK_GE(agg_col_count, executor->plan_state_->target_exprs_.size());
607  for (size_t target_idx = 0; target_idx < executor->plan_state_->target_exprs_.size();
608  ++target_idx) {
609  const auto target_expr = executor->plan_state_->target_exprs_[target_idx];
610  const auto agg_info = get_target_info(target_expr, g_bigint_count);
611  if (is_distinct_target(agg_info)) {
612  CHECK(agg_info.is_agg &&
613  (agg_info.agg_kind == kCOUNT || agg_info.agg_kind == kAPPROX_COUNT_DISTINCT));
614  CHECK(!agg_info.sql_type.is_varlen());
615 
616  const auto agg_col_idx = query_mem_desc.getSlotIndexForSingleSlotCol(target_idx);
617  CHECK_LT(static_cast<size_t>(agg_col_idx), agg_col_count);
618 
619  CHECK_EQ(static_cast<size_t>(query_mem_desc.getLogicalSlotWidthBytes(agg_col_idx)),
620  sizeof(int64_t));
621  const auto& count_distinct_desc =
622  query_mem_desc.getCountDistinctDescriptor(target_idx);
623  CHECK(count_distinct_desc.impl_type_ != CountDistinctImplType::Invalid);
624  if (count_distinct_desc.impl_type_ == CountDistinctImplType::Bitmap) {
625  const auto bitmap_byte_sz = count_distinct_desc.bitmapPaddedSizeBytes();
626  if (deferred) {
627  agg_bitmap_size[agg_col_idx] = bitmap_byte_sz;
628  } else {
629  init_agg_vals_[agg_col_idx] = allocateCountDistinctBitmap(bitmap_byte_sz);
630  }
631  } else {
632  CHECK(count_distinct_desc.impl_type_ == CountDistinctImplType::StdSet);
633  if (deferred) {
634  agg_bitmap_size[agg_col_idx] = -1;
635  } else {
636  init_agg_vals_[agg_col_idx] = allocateCountDistinctSet();
637  }
638  }
639  }
640  }
641 
642  return agg_bitmap_size;
643 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
const int64_t const uint32_t const uint32_t const uint32_t agg_col_count
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:78
#define CHECK_GE(x, y)
Definition: Logger.h:210
CHECK(cgen_state)
std::vector< int64_t > init_agg_vals_
bool g_bigint_count
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:129
const CountDistinctDescriptor & getCountDistinctDescriptor(const size_t idx) const
#define CHECK_LT(x, y)
Definition: Logger.h:207
Definition: sqldefs.h:76
const int8_t getSlotIndexForSingleSlotCol(const size_t col_idx) const
const int8_t getLogicalSlotWidthBytes(const size_t slot_idx) const
int64_t allocateCountDistinctBitmap(const size_t bitmap_byte_sz)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void QueryMemoryInitializer::allocateCountDistinctGpuMem ( const QueryMemoryDescriptor query_mem_desc)
private

Definition at line 567 of file QueryMemoryInitializer.cpp.

References Allocator::alloc(), Bitmap, CHECK(), count_distinct_bitmap_crt_ptr_, count_distinct_bitmap_host_mem_, count_distinct_bitmap_mem_, count_distinct_bitmap_mem_bytes_, QueryMemoryDescriptor::countDistinctDescriptorsLogicallyEmpty(), device_allocator_, QueryMemoryDescriptor::getCountDistinctDescriptor(), QueryMemoryDescriptor::getCountDistinctDescriptorsSize(), QueryMemoryDescriptor::getEntryCount(), Invalid, row_set_mem_owner_, and DeviceAllocator::zeroDeviceMem().

Referenced by QueryMemoryInitializer().

568  {
569  if (query_mem_desc.countDistinctDescriptorsLogicallyEmpty()) {
570  return;
571  }
573 
574  size_t total_bytes_per_entry{0};
575  const size_t num_count_distinct_descs =
576  query_mem_desc.getCountDistinctDescriptorsSize();
577  for (size_t i = 0; i < num_count_distinct_descs; i++) {
578  const auto count_distinct_desc = query_mem_desc.getCountDistinctDescriptor(i);
579  if (count_distinct_desc.impl_type_ == CountDistinctImplType::Invalid) {
580  continue;
581  }
582  CHECK(count_distinct_desc.impl_type_ == CountDistinctImplType::Bitmap);
583  total_bytes_per_entry += count_distinct_desc.bitmapPaddedSizeBytes();
584  }
585 
587  total_bytes_per_entry * query_mem_desc.getEntryCount();
588  count_distinct_bitmap_mem_ = reinterpret_cast<CUdeviceptr>(
590  device_allocator_->zeroDeviceMem(reinterpret_cast<int8_t*>(count_distinct_bitmap_mem_),
592 
595 }
bool countDistinctDescriptorsLogicallyEmpty() const
DeviceAllocator * device_allocator_
unsigned long long CUdeviceptr
Definition: nocuda.h:27
virtual int8_t * alloc(const size_t num_bytes)=0
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
CHECK(cgen_state)
virtual void zeroDeviceMem(int8_t *device_ptr, const size_t num_bytes) const =0
size_t getCountDistinctDescriptorsSize() const
const CountDistinctDescriptor & getCountDistinctDescriptor(const size_t idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int64_t QueryMemoryInitializer::allocateCountDistinctSet ( )
private

Definition at line 658 of file QueryMemoryInitializer.cpp.

References row_set_mem_owner_.

Referenced by allocateCountDistinctBuffers(), and initColumnPerRow().

658  {
659  auto count_distinct_set = new std::set<int64_t>();
660  row_set_mem_owner_->addCountDistinctSet(count_distinct_set);
661  return reinterpret_cast<int64_t>(count_distinct_set);
662 }
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_

+ Here is the caller graph for this function:

void QueryMemoryInitializer::applyStreamingTopNOffsetCpu ( const QueryMemoryDescriptor query_mem_desc,
const RelAlgExecutionUnit ra_exe_unit 
)
private

Definition at line 945 of file QueryMemoryInitializer.cpp.

References CHECK_EQ, CPU, streaming_top_n::get_rows_copy_from_heaps(), QueryMemoryDescriptor::getBufferSizeBytes(), QueryMemoryDescriptor::getEntryCount(), QueryMemoryDescriptor::getRowSize(), group_by_buffers_, SortInfo::limit, SortInfo::offset, and RelAlgExecutionUnit::sort_info.

947  {
948  CHECK_EQ(group_by_buffers_.size(), size_t(1));
949 
950  const auto rows_copy = streaming_top_n::get_rows_copy_from_heaps(
952  query_mem_desc.getBufferSizeBytes(ra_exe_unit, 1, ExecutorDeviceType::CPU),
953  ra_exe_unit.sort_info.offset + ra_exe_unit.sort_info.limit,
954  1);
955  CHECK_EQ(rows_copy.size(),
956  query_mem_desc.getEntryCount() * query_mem_desc.getRowSize());
957  memcpy(group_by_buffers_[0], &rows_copy[0], rows_copy.size());
958 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
size_t getBufferSizeBytes(const RelAlgExecutionUnit &ra_exe_unit, const unsigned thread_count, const ExecutorDeviceType device_type) const
const size_t limit
const SortInfo sort_info
std::vector< int64_t * > group_by_buffers_
std::vector< int8_t > get_rows_copy_from_heaps(const int64_t *heaps, const size_t heaps_size, const size_t n, const size_t thread_count)
const size_t offset

+ Here is the call graph for this function:

void QueryMemoryInitializer::applyStreamingTopNOffsetGpu ( Data_Namespace::DataMgr data_mgr,
const QueryMemoryDescriptor query_mem_desc,
const GpuGroupByBuffers gpu_group_by_buffers,
const RelAlgExecutionUnit ra_exe_unit,
const unsigned  total_thread_count,
const int  device_id 
)
private

Definition at line 960 of file QueryMemoryInitializer.cpp.

References CHECK_EQ, QueryMemoryDescriptor::getEntryCount(), QueryMemoryDescriptor::getRowSize(), group_by_buffers_, num_buffers_, GpuGroupByBuffers::second, and UNREACHABLE.

966  {
967 #ifdef HAVE_CUDA
969 
970  const auto rows_copy = pick_top_n_rows_from_dev_heaps(
971  data_mgr,
972  reinterpret_cast<int64_t*>(gpu_group_by_buffers.second),
973  ra_exe_unit,
974  query_mem_desc,
975  total_thread_count,
976  device_id);
977  CHECK_EQ(
978  rows_copy.size(),
979  static_cast<size_t>(query_mem_desc.getEntryCount() * query_mem_desc.getRowSize()));
980  memcpy(group_by_buffers_[0], &rows_copy[0], rows_copy.size());
981 #else
982  UNREACHABLE();
983 #endif
984 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
#define UNREACHABLE()
Definition: Logger.h:241
CUdeviceptr second
Definition: GpuMemUtils.h:61
std::vector< int64_t * > group_by_buffers_

+ Here is the call graph for this function:

void QueryMemoryInitializer::compactProjectionBuffersCpu ( const QueryMemoryDescriptor query_mem_desc,
const size_t  projection_count 
)
private

Definition at line 872 of file QueryMemoryInitializer.cpp.

References CHECK(), anonymous_namespace{QueryMemoryInitializer.cpp}::compact_projection_buffer_for_cpu_columnar(), QueryMemoryDescriptor::getEntryCount(), group_by_buffers_, and result_sets_.

874  {
875  const auto num_allocated_rows =
876  std::min(projection_count, query_mem_desc.getEntryCount());
877 
878  // copy the results from the main buffer into projection_buffer
880  query_mem_desc,
881  reinterpret_cast<int8_t*>(group_by_buffers_[0]),
882  num_allocated_rows);
883 
884  // update the entry count for the result set, and its underlying storage
885  CHECK(!result_sets_.empty());
886  result_sets_.front()->updateStorageEntryCount(num_allocated_rows);
887 }
void compact_projection_buffer_for_cpu_columnar(const QueryMemoryDescriptor &query_mem_desc, int8_t *projection_buffer, const size_t projection_count)
CHECK(cgen_state)
std::vector< int64_t * > group_by_buffers_
std::vector< std::unique_ptr< ResultSet > > result_sets_

+ Here is the call graph for this function:

void QueryMemoryInitializer::compactProjectionBuffersGpu ( const QueryMemoryDescriptor query_mem_desc,
Data_Namespace::DataMgr data_mgr,
const GpuGroupByBuffers gpu_group_by_buffers,
const size_t  projection_count,
const int  device_id 
)
private

Definition at line 889 of file QueryMemoryInitializer.cpp.

References CHECK(), copy_projection_buffer_from_gpu_columnar(), QueryMemoryDescriptor::getEntryCount(), group_by_buffers_, and result_sets_.

894  {
895  // store total number of allocated rows:
896  const auto num_allocated_rows =
897  std::min(projection_count, query_mem_desc.getEntryCount());
898 
899  // copy the results from the main buffer into projection_buffer
901  data_mgr,
902  gpu_group_by_buffers,
903  query_mem_desc,
904  reinterpret_cast<int8_t*>(group_by_buffers_[0]),
905  num_allocated_rows,
906  device_id);
907 
908  // update the entry count for the result set, and its underlying storage
909  CHECK(!result_sets_.empty());
910  result_sets_.front()->updateStorageEntryCount(num_allocated_rows);
911 }
CHECK(cgen_state)
std::vector< int64_t * > group_by_buffers_
void copy_projection_buffer_from_gpu_columnar(Data_Namespace::DataMgr *data_mgr, const GpuGroupByBuffers &gpu_group_by_buffers, const QueryMemoryDescriptor &query_mem_desc, int8_t *projection_buffer, const size_t projection_count, const int device_id)
std::vector< std::unique_ptr< ResultSet > > result_sets_

+ Here is the call graph for this function:

size_t QueryMemoryInitializer::computeNumberOfBuffers ( const QueryMemoryDescriptor query_mem_desc,
const ExecutorDeviceType  device_type,
const Executor executor 
) const
private

Definition at line 828 of file QueryMemoryInitializer.cpp.

References QueryMemoryDescriptor::blocksShareMemory(), and CPU.

831  {
832  return device_type == ExecutorDeviceType::CPU
833  ? 1
834  : executor->blockSize() *
835  (query_mem_desc.blocksShareMemory() ? 1 : executor->gridSize());
836 }

+ Here is the call graph for this function:

void QueryMemoryInitializer::copyGroupByBuffersFromGpu ( Data_Namespace::DataMgr data_mgr,
const QueryMemoryDescriptor query_mem_desc,
const size_t  entry_count,
const GpuGroupByBuffers gpu_group_by_buffers,
const RelAlgExecutionUnit ra_exe_unit,
const unsigned  block_size_x,
const unsigned  grid_size_x,
const int  device_id,
const bool  prepend_index_buffer 
) const

Definition at line 913 of file QueryMemoryInitializer.cpp.

References copy_group_by_buffers_from_gpu(), streaming_top_n::get_heap_size(), QueryMemoryDescriptor::getBufferSizeBytes(), QueryMemoryDescriptor::getRowSize(), GPU, group_by_buffers_, SortInfo::limit, SortInfo::offset, GpuGroupByBuffers::second, RelAlgExecutionUnit::sort_info, and QueryMemoryDescriptor::useStreamingTopN().

922  {
923  const auto thread_count = block_size_x * grid_size_x;
924 
925  size_t total_buff_size{0};
926  if (ra_exe_unit && query_mem_desc.useStreamingTopN()) {
927  const size_t n = ra_exe_unit->sort_info.offset + ra_exe_unit->sort_info.limit;
928  total_buff_size =
929  streaming_top_n::get_heap_size(query_mem_desc.getRowSize(), n, thread_count);
930  } else {
931  total_buff_size =
932  query_mem_desc.getBufferSizeBytes(ExecutorDeviceType::GPU, entry_count);
933  }
936  total_buff_size,
937  gpu_group_by_buffers.second,
938  query_mem_desc,
939  block_size_x,
940  grid_size_x,
941  device_id,
942  prepend_index_buffer);
943 }
size_t getBufferSizeBytes(const RelAlgExecutionUnit &ra_exe_unit, const unsigned thread_count, const ExecutorDeviceType device_type) const
const size_t limit
CUdeviceptr second
Definition: GpuMemUtils.h:61
const SortInfo sort_info
std::vector< int64_t * > group_by_buffers_
void copy_group_by_buffers_from_gpu(Data_Namespace::DataMgr *data_mgr, const std::vector< int64_t * > &group_by_buffers, const size_t groups_buffer_size, const CUdeviceptr group_by_dev_buffers_mem, const QueryMemoryDescriptor &query_mem_desc, const unsigned block_size_x, const unsigned grid_size_x, const int device_id, const bool prepend_index_buffer)
size_t get_heap_size(const size_t row_size, const size_t n, const size_t thread_count)
const size_t offset

+ Here is the call graph for this function:

int64_t QueryMemoryInitializer::getAggInitValForIndex ( const size_t  index) const
inline

Definition at line 89 of file QueryMemoryInitializer.h.

References CHECK_LT, and init_agg_vals_.

89  {
90  CHECK_LT(index, init_agg_vals_.size());
91  return init_agg_vals_[index];
92  }
std::vector< int64_t > init_agg_vals_
#define CHECK_LT(x, y)
Definition: Logger.h:207
const auto QueryMemoryInitializer::getCountDistinctBitmapBytes ( ) const
inline

Definition at line 70 of file QueryMemoryInitializer.h.

References count_distinct_bitmap_mem_bytes_.

70  {
72  }
const auto QueryMemoryInitializer::getCountDistinctBitmapPtr ( ) const
inline

Definition at line 66 of file QueryMemoryInitializer.h.

References count_distinct_bitmap_mem_.

const auto QueryMemoryInitializer::getCountDistinctHostPtr ( ) const
inline

Definition at line 68 of file QueryMemoryInitializer.h.

References count_distinct_bitmap_host_mem_.

const auto QueryMemoryInitializer::getGroupByBuffersPtr ( )
inline

Definition at line 94 of file QueryMemoryInitializer.h.

References group_by_buffers_.

94  {
95  return reinterpret_cast<int64_t**>(group_by_buffers_.data());
96  }
std::vector< int64_t * > group_by_buffers_
const auto QueryMemoryInitializer::getGroupByBuffersSize ( ) const
inline

Definition at line 98 of file QueryMemoryInitializer.h.

References group_by_buffers_.

98 { return group_by_buffers_.size(); }
std::vector< int64_t * > group_by_buffers_
const auto QueryMemoryInitializer::getNumBuffers ( ) const
inline

Definition at line 100 of file QueryMemoryInitializer.h.

References CHECK_EQ, group_by_buffers_, and num_buffers_.

100  {
102  return num_buffers_;
103  }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
std::vector< int64_t * > group_by_buffers_
ResultSet* QueryMemoryInitializer::getResultSet ( const size_t  index) const
inline

Definition at line 74 of file QueryMemoryInitializer.h.

References CHECK_LT, and result_sets_.

74  {
75  CHECK_LT(index, result_sets_.size());
76  return result_sets_[index].get();
77  }
#define CHECK_LT(x, y)
Definition: Logger.h:207
std::vector< std::unique_ptr< ResultSet > > result_sets_
std::unique_ptr<ResultSet> QueryMemoryInitializer::getResultSetOwned ( const size_t  index)
inline

Definition at line 79 of file QueryMemoryInitializer.h.

References CHECK_LT, and result_sets_.

79  {
80  CHECK_LT(index, result_sets_.size());
81  return std::move(result_sets_[index]);
82  }
#define CHECK_LT(x, y)
Definition: Logger.h:207
std::vector< std::unique_ptr< ResultSet > > result_sets_
void QueryMemoryInitializer::initColumnarGroups ( const QueryMemoryDescriptor query_mem_desc,
int64_t *  groups_buffer,
const std::vector< int64_t > &  init_vals,
const Executor executor 
)
private

Definition at line 459 of file QueryMemoryInitializer.cpp.

References agg_col_count, align_to_int64(), CHECK(), CHECK_LT, EMPTY_KEY_64, g_bigint_count, get_target_info(), QueryMemoryDescriptor::getEntryCount(), QueryMemoryDescriptor::getGroupbyColCount(), QueryMemoryDescriptor::getPaddedSlotWidthBytes(), QueryMemoryDescriptor::getQueryDescriptionType(), QueryMemoryDescriptor::getSlotCount(), groups_buffer, groups_buffer_entry_count, QueryMemoryDescriptor::hasKeylessHash(), is_distinct_target(), and Projection.

Referenced by initGroupByBuffer().

463  {
465  for (const auto target_expr : executor->plan_state_->target_exprs_) {
466  const auto agg_info = get_target_info(target_expr, g_bigint_count);
467  CHECK(!is_distinct_target(agg_info));
468  }
469  const int32_t agg_col_count = query_mem_desc.getSlotCount();
470  auto buffer_ptr = reinterpret_cast<int8_t*>(groups_buffer);
471 
472  const auto groups_buffer_entry_count = query_mem_desc.getEntryCount();
473  if (!query_mem_desc.hasKeylessHash()) {
474  const size_t key_count{query_mem_desc.getGroupbyColCount()};
475  for (size_t i = 0; i < key_count; ++i) {
476  buffer_ptr = initColumnarBuffer<int64_t>(reinterpret_cast<int64_t*>(buffer_ptr),
477  EMPTY_KEY_64,
479  }
480  }
481 
483  // initializing all aggregate columns:
484  int32_t init_val_idx = 0;
485  for (int32_t i = 0; i < agg_col_count; ++i) {
486  if (query_mem_desc.getPaddedSlotWidthBytes(i) > 0) {
487  CHECK_LT(static_cast<size_t>(init_val_idx), init_vals.size());
488  switch (query_mem_desc.getPaddedSlotWidthBytes(i)) {
489  case 1:
490  buffer_ptr = initColumnarBuffer<int8_t>(
491  buffer_ptr, init_vals[init_val_idx++], groups_buffer_entry_count);
492  break;
493  case 2:
494  buffer_ptr =
495  initColumnarBuffer<int16_t>(reinterpret_cast<int16_t*>(buffer_ptr),
496  init_vals[init_val_idx++],
498  break;
499  case 4:
500  buffer_ptr =
501  initColumnarBuffer<int32_t>(reinterpret_cast<int32_t*>(buffer_ptr),
502  init_vals[init_val_idx++],
504  break;
505  case 8:
506  buffer_ptr =
507  initColumnarBuffer<int64_t>(reinterpret_cast<int64_t*>(buffer_ptr),
508  init_vals[init_val_idx++],
510  break;
511  case 0:
512  break;
513  default:
514  CHECK(false);
515  }
516 
517  buffer_ptr = align_to_int64(buffer_ptr);
518  }
519  }
520  }
521 }
const int32_t groups_buffer_size return groups_buffer
const int64_t const uint32_t const uint32_t const uint32_t agg_col_count
#define EMPTY_KEY_64
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:78
const int64_t const uint32_t groups_buffer_entry_count
CHECK(cgen_state)
size_t getGroupbyColCount() const
bool g_bigint_count
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:129
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
QueryDescriptionType getQueryDescriptionType() const
#define CHECK_LT(x, y)
Definition: Logger.h:207
const int64_t * init_vals
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void QueryMemoryInitializer::initColumnPerRow ( const QueryMemoryDescriptor query_mem_desc,
int8_t *  row_ptr,
const size_t  bin,
const std::vector< int64_t > &  init_vals,
const std::vector< ssize_t > &  bitmap_sizes 
)
private

Definition at line 523 of file QueryMemoryInitializer.cpp.

References allocateCountDistinctBitmap(), allocateCountDistinctSet(), CHECK(), CHECK_EQ, CHECK_LT, QueryMemoryDescriptor::getNextColOffInBytes(), QueryMemoryDescriptor::getPaddedSlotWidthBytes(), QueryMemoryDescriptor::getSlotCount(), and QueryMemoryDescriptor::isGroupBy().

Referenced by initGroups().

527  {
528  int8_t* col_ptr = row_ptr;
529  size_t init_vec_idx = 0;
530  for (size_t col_idx = 0; col_idx < query_mem_desc.getSlotCount();
531  col_ptr += query_mem_desc.getNextColOffInBytes(col_ptr, bin, col_idx++)) {
532  const ssize_t bm_sz{bitmap_sizes[col_idx]};
533  int64_t init_val{0};
534  if (!bm_sz || !query_mem_desc.isGroupBy()) {
535  if (query_mem_desc.getPaddedSlotWidthBytes(col_idx) > 0) {
536  CHECK_LT(init_vec_idx, init_vals.size());
537  init_val = init_vals[init_vec_idx++];
538  }
539  } else {
540  CHECK_EQ(static_cast<size_t>(query_mem_desc.getPaddedSlotWidthBytes(col_idx)),
541  sizeof(int64_t));
542  init_val =
544  ++init_vec_idx;
545  }
546  switch (query_mem_desc.getPaddedSlotWidthBytes(col_idx)) {
547  case 1:
548  *col_ptr = static_cast<int8_t>(init_val);
549  break;
550  case 2:
551  *reinterpret_cast<int16_t*>(col_ptr) = (int16_t)init_val;
552  break;
553  case 4:
554  *reinterpret_cast<int32_t*>(col_ptr) = (int32_t)init_val;
555  break;
556  case 8:
557  *reinterpret_cast<int64_t*>(col_ptr) = init_val;
558  break;
559  case 0:
560  continue;
561  default:
562  CHECK(false);
563  }
564  }
565 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
size_t getNextColOffInBytes(const int8_t *col_ptr, const size_t bin, const size_t col_idx) const
CHECK(cgen_state)
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
#define CHECK_LT(x, y)
Definition: Logger.h:207
const int64_t * init_vals
int64_t allocateCountDistinctBitmap(const size_t bitmap_byte_sz)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void QueryMemoryInitializer::initGroupByBuffer ( int64_t *  buffer,
const RelAlgExecutionUnit ra_exe_unit,
const QueryMemoryDescriptor query_mem_desc,
const ExecutorDeviceType  device_type,
const bool  output_columnar,
const Executor executor 
)
private

Definition at line 367 of file QueryMemoryInitializer.cpp.

References streaming_top_n::get_rows_offset_of_heaps(), QueryMemoryDescriptor::getEntryCount(), GPU, init_agg_vals_, initColumnarGroups(), initGroups(), QueryMemoryDescriptor::interleavedBins(), SortInfo::limit, SortInfo::offset, RelAlgExecutionUnit::sort_info, and QueryMemoryDescriptor::useStreamingTopN().

Referenced by QueryMemoryInitializer().

373  {
374  if (output_columnar) {
375  initColumnarGroups(query_mem_desc, buffer, init_agg_vals_, executor);
376  } else {
377  auto rows_ptr = buffer;
378  auto actual_entry_count = query_mem_desc.getEntryCount();
379  const auto thread_count = device_type == ExecutorDeviceType::GPU
380  ? executor->blockSize() * executor->gridSize()
381  : 1;
382  auto warp_size =
383  query_mem_desc.interleavedBins(device_type) ? executor->warpSize() : 1;
384  if (query_mem_desc.useStreamingTopN()) {
385  const auto node_count_size = thread_count * sizeof(int64_t);
386  memset(rows_ptr, 0, node_count_size);
387  const auto n = ra_exe_unit.sort_info.offset + ra_exe_unit.sort_info.limit;
388  const auto rows_offset = streaming_top_n::get_rows_offset_of_heaps(n, thread_count);
389  memset(rows_ptr + thread_count, -1, rows_offset - node_count_size);
390  rows_ptr += rows_offset / sizeof(int64_t);
391  actual_entry_count = n * thread_count;
392  warp_size = 1;
393  }
394  initGroups(query_mem_desc,
395  rows_ptr,
397  actual_entry_count,
398  warp_size,
399  executor);
400  }
401 }
void initGroups(const QueryMemoryDescriptor &query_mem_desc, int64_t *groups_buffer, const std::vector< int64_t > &init_vals, const int32_t groups_buffer_entry_count, const size_t warp_size, const Executor *executor)
size_t get_rows_offset_of_heaps(const size_t n, const size_t thread_count)
const size_t limit
std::vector< int64_t > init_agg_vals_
const SortInfo sort_info
void initColumnarGroups(const QueryMemoryDescriptor &query_mem_desc, int64_t *groups_buffer, const std::vector< int64_t > &init_vals, const Executor *executor)
bool interleavedBins(const ExecutorDeviceType) const
const size_t offset

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void QueryMemoryInitializer::initGroups ( const QueryMemoryDescriptor query_mem_desc,
int64_t *  groups_buffer,
const std::vector< int64_t > &  init_vals,
const int32_t  groups_buffer_entry_count,
const size_t  warp_size,
const Executor executor 
)
private

Definition at line 403 of file QueryMemoryInitializer.cpp.

References allocateCountDistinctBuffers(), CHECK(), fill_empty_key(), ResultSet::fixupQueryMemoryDescriptor(), QueryMemoryDescriptor::getColOffInBytes(), QueryMemoryDescriptor::getEffectiveKeyWidth(), QueryMemoryDescriptor::getGroupbyColCount(), QueryMemoryDescriptor::getRowSize(), groups_buffer, groups_buffer_entry_count, QueryMemoryDescriptor::hasKeylessHash(), and initColumnPerRow().

Referenced by initGroupByBuffer().

408  {
409  const size_t key_count{query_mem_desc.getGroupbyColCount()};
410  const size_t row_size{query_mem_desc.getRowSize()};
411  const size_t col_base_off{query_mem_desc.getColOffInBytes(0)};
412 
413  auto agg_bitmap_size = allocateCountDistinctBuffers(query_mem_desc, true, executor);
414  auto buffer_ptr = reinterpret_cast<int8_t*>(groups_buffer);
415 
416  const auto query_mem_desc_fixedup =
418 
419  if (query_mem_desc.hasKeylessHash()) {
420  CHECK(warp_size >= 1);
421  CHECK(key_count == 1 || warp_size == 1);
422  for (size_t warp_idx = 0; warp_idx < warp_size; ++warp_idx) {
423  for (size_t bin = 0; bin < static_cast<size_t>(groups_buffer_entry_count);
424  ++bin, buffer_ptr += row_size) {
425  initColumnPerRow(query_mem_desc_fixedup,
426  &buffer_ptr[col_base_off],
427  bin,
428  init_vals,
429  agg_bitmap_size);
430  }
431  }
432  return;
433  }
434 
435  for (size_t bin = 0; bin < static_cast<size_t>(groups_buffer_entry_count);
436  ++bin, buffer_ptr += row_size) {
437  fill_empty_key(buffer_ptr, key_count, query_mem_desc.getEffectiveKeyWidth());
438  initColumnPerRow(query_mem_desc_fixedup,
439  &buffer_ptr[col_base_off],
440  bin,
441  init_vals,
442  agg_bitmap_size);
443  }
444 }
std::vector< ssize_t > allocateCountDistinctBuffers(const QueryMemoryDescriptor &query_mem_desc, const bool deferred, const Executor *executor)
const int32_t groups_buffer_size return groups_buffer
void fill_empty_key(void *key_ptr, const size_t key_count, const size_t key_width)
size_t getEffectiveKeyWidth() const
const int64_t const uint32_t groups_buffer_entry_count
CHECK(cgen_state)
size_t getGroupbyColCount() const
static QueryMemoryDescriptor fixupQueryMemoryDescriptor(const QueryMemoryDescriptor &)
Definition: ResultSet.cpp:506
const int64_t * init_vals
size_t getColOffInBytes(const size_t col_idx) const
void initColumnPerRow(const QueryMemoryDescriptor &query_mem_desc, int8_t *row_ptr, const size_t bin, const std::vector< int64_t > &init_vals, const std::vector< ssize_t > &bitmap_sizes)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void QueryMemoryInitializer::resetResultSet ( const size_t  index)
inline

Definition at line 84 of file QueryMemoryInitializer.h.

References CHECK_LT, and result_sets_.

84  {
85  CHECK_LT(index, result_sets_.size());
86  result_sets_[index].reset();
87  }
#define CHECK_LT(x, y)
Definition: Logger.h:207
std::vector< std::unique_ptr< ResultSet > > result_sets_

Friends And Related Function Documentation

friend class Executor
friend

Definition at line 222 of file QueryMemoryInitializer.h.

friend class QueryExecutionContext
friend

Definition at line 223 of file QueryMemoryInitializer.h.

Member Data Documentation

int8_t* QueryMemoryInitializer::count_distinct_bitmap_crt_ptr_
private
int8_t* QueryMemoryInitializer::count_distinct_bitmap_host_mem_
private
CUdeviceptr QueryMemoryInitializer::count_distinct_bitmap_mem_
private
size_t QueryMemoryInitializer::count_distinct_bitmap_mem_bytes_
private
DeviceAllocator* QueryMemoryInitializer::device_allocator_ {nullptr}
private

Definition at line 219 of file QueryMemoryInitializer.h.

Referenced by allocateCountDistinctGpuMem().

std::vector<int64_t> QueryMemoryInitializer::init_agg_vals_
private
const size_t QueryMemoryInitializer::num_buffers_
private
const int64_t QueryMemoryInitializer::num_rows_
private

Definition at line 204 of file QueryMemoryInitializer.h.

std::vector<std::unique_ptr<ResultSet> > QueryMemoryInitializer::result_sets_
private
std::shared_ptr<RowSetMemoryOwner> QueryMemoryInitializer::row_set_mem_owner_
private
std::vector<Data_Namespace::AbstractBuffer*> QueryMemoryInitializer::temporary_buffers_
private

Definition at line 220 of file QueryMemoryInitializer.h.


The documentation for this class was generated from the following files: