OmniSciDB  c07336695a
QueryMemoryInitializer Class Reference

#include <QueryMemoryInitializer.h>

+ Collaboration diagram for QueryMemoryInitializer:

Public Member Functions

 QueryMemoryInitializer (const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc, const int device_id, const ExecutorDeviceType device_type, const ExecutorDispatchMode dispatch_mode, const bool output_columnar, const bool sort_on_gpu, const int64_t num_rows, const std::vector< std::vector< const int8_t *>> &col_buffers, const std::vector< std::vector< uint64_t >> &frag_offsets, RenderAllocatorMap *render_allocator_map, RenderInfo *render_info, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, DeviceAllocator *gpu_allocator, const Executor *executor)
 
const auto getCountDistinctBitmapPtr () const
 
const auto getCountDistinctHostPtr () const
 
const auto getCountDistinctBitmapBytes () const
 
ResultSetgetResultSet (const size_t index) const
 
std::unique_ptr< ResultSetgetResultSetOwned (const size_t index)
 
void resetResultSet (const size_t index)
 
int64_t getAggInitValForIndex (const size_t index) const
 
const auto getGroupByBuffersPtr ()
 
const auto getGroupByBuffersSize () const
 
const auto getNumBuffers () const
 

Private Member Functions

void initGroups (const QueryMemoryDescriptor &query_mem_desc, int64_t *groups_buffer, const std::vector< int64_t > &init_vals, const int32_t groups_buffer_entry_count, const size_t warp_size, const Executor *executor)
 
void initColumnarGroups (const QueryMemoryDescriptor &query_mem_desc, int64_t *groups_buffer, const std::vector< int64_t > &init_vals, const Executor *executor)
 
void initColumnPerRow (const QueryMemoryDescriptor &query_mem_desc, int8_t *row_ptr, const size_t bin, const std::vector< int64_t > &init_vals, const std::vector< ssize_t > &bitmap_sizes)
 
void allocateCountDistinctGpuMem (const QueryMemoryDescriptor &query_mem_desc)
 
std::vector< ssize_t > allocateCountDistinctBuffers (const QueryMemoryDescriptor &query_mem_desc, const bool deferred, const Executor *executor)
 
int64_t allocateCountDistinctBitmap (const size_t bitmap_byte_sz)
 
int64_t allocateCountDistinctSet ()
 
size_t computeNumberOfBuffers (const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, const Executor *executor) const
 
void compactProjectionBuffersCpu (const QueryMemoryDescriptor &query_mem_desc, const size_t projection_count)
 
void compactProjectionBuffersGpu (const QueryMemoryDescriptor &query_mem_desc, Data_Namespace::DataMgr *data_mgr, const GpuGroupByBuffers &gpu_group_by_buffers, const size_t projection_count, const int device_id)
 
void copyGroupByBuffersFromGpu (Data_Namespace::DataMgr *data_mgr, const QueryMemoryDescriptor &query_mem_desc, const size_t entry_count, const GpuGroupByBuffers &gpu_group_by_buffers, const RelAlgExecutionUnit &ra_exe_unit, const unsigned block_size_x, const unsigned grid_size_x, const int device_id, const bool prepend_index_buffer) const
 
void applyStreamingTopNOffsetCpu (const QueryMemoryDescriptor &query_mem_desc, const RelAlgExecutionUnit &ra_exe_unit)
 
void applyStreamingTopNOffsetGpu (Data_Namespace::DataMgr *data_mgr, const QueryMemoryDescriptor &query_mem_desc, const GpuGroupByBuffers &gpu_group_by_buffers, const RelAlgExecutionUnit &ra_exe_unit, const unsigned total_thread_count, const int device_id)
 

Private Attributes

const int64_t num_rows_
 
std::shared_ptr< RowSetMemoryOwnerrow_set_mem_owner_
 
std::vector< std::unique_ptr< ResultSet > > result_sets_
 
std::vector< int64_t > init_agg_vals_
 
const size_t num_buffers_
 
std::vector< int64_t * > group_by_buffers_
 
CUdeviceptr count_distinct_bitmap_mem_
 
size_t count_distinct_bitmap_mem_bytes_
 
int8_t * count_distinct_bitmap_crt_ptr_
 
int8_t * count_distinct_bitmap_host_mem_
 
DeviceAllocatordevice_allocator_ {nullptr}
 

Friends

class Executor
 
class QueryExecutionContext
 

Detailed Description

Definition at line 35 of file QueryMemoryInitializer.h.

Constructor & Destructor Documentation

◆ QueryMemoryInitializer()

QueryMemoryInitializer::QueryMemoryInitializer ( const RelAlgExecutionUnit ra_exe_unit,
const QueryMemoryDescriptor query_mem_desc,
const int  device_id,
const ExecutorDeviceType  device_type,
const ExecutorDispatchMode  dispatch_mode,
const bool  output_columnar,
const bool  sort_on_gpu,
const int64_t  num_rows,
const std::vector< std::vector< const int8_t *>> &  col_buffers,
const std::vector< std::vector< uint64_t >> &  frag_offsets,
RenderAllocatorMap render_allocator_map,
RenderInfo render_info,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
DeviceAllocator gpu_allocator,
const Executor executor 
)

Definition at line 151 of file QueryMemoryInitializer.cpp.

References anonymous_namespace{QueryMemoryInitializer.cpp}::alloc_group_by_buffer(), allocateCountDistinctBuffers(), allocateCountDistinctGpuMem(), CHECK, CHECK_GE, anonymous_namespace{QueryMemoryInitializer.cpp}::check_total_bitmap_memory(), checked_malloc(), QueryMemoryDescriptor::didOutputColumnar(), RelAlgExecutionUnit::estimator, ResultSet::fixupQueryMemoryDescriptor(), g_max_memory_allocation_size, anonymous_namespace{QueryMemoryInitializer.cpp}::get_col_frag_offsets(), anonymous_namespace{QueryMemoryInitializer.cpp}::get_consistent_frags_sizes(), streaming_top_n::get_rows_offset_of_heaps(), QueryMemoryDescriptor::getBufferSizeBytes(), QueryMemoryDescriptor::getEntryCount(), QueryMemoryDescriptor::getRowSize(), GPU, group_by_buffers_, QueryMemoryDescriptor::hasKeylessHash(), init_agg_vals_, initColumnarGroups(), initGroups(), QueryMemoryDescriptor::interleavedBins(), QueryMemoryDescriptor::isGroupBy(), KernelPerFragment, QueryMemoryDescriptor::lazyInitGroups(), SortInfo::limit, num_buffers_, SortInfo::offset, result_sets_, row_set_mem_owner_, RelAlgExecutionUnit::sort_info, RelAlgExecutionUnit::target_exprs, target_exprs_to_infos(), QueryMemoryDescriptor::threadsShareMemory(), RelAlgExecutionUnit::use_bump_allocator, use_streaming_top_n(), RenderInfo::useCudaBuffers(), and warp_size.

168  , row_set_mem_owner_(row_set_mem_owner)
169  , init_agg_vals_(executor->plan_state_->init_agg_vals_)
170  , num_buffers_(computeNumberOfBuffers(query_mem_desc, device_type, executor))
175  , device_allocator_(device_allocator) {
176  CHECK(!sort_on_gpu || output_columnar);
177 
178  const auto& consistent_frag_sizes = get_consistent_frags_sizes(frag_offsets);
179  if (consistent_frag_sizes.empty()) {
180  // No fragments in the input, no underlying buffers will be needed.
181  return;
182  }
183  if (!ra_exe_unit.use_bump_allocator) {
184  check_total_bitmap_memory(query_mem_desc);
185  }
186  if (device_type == ExecutorDeviceType::GPU) {
187  allocateCountDistinctGpuMem(query_mem_desc);
188  }
189 
190  if (render_allocator_map || !query_mem_desc.isGroupBy()) {
191  allocateCountDistinctBuffers(query_mem_desc, false, executor);
192  if (render_info && render_info->useCudaBuffers()) {
193  return;
194  }
195  }
196 
197  if (ra_exe_unit.estimator) {
198  return;
199  }
200 
201  const auto thread_count = device_type == ExecutorDeviceType::GPU
202  ? executor->blockSize() * executor->gridSize()
203  : 1;
204 
205  size_t group_buffer_size{0};
206  if (ra_exe_unit.use_bump_allocator) {
207  // For kernel per fragment execution, just allocate a buffer equivalent to the size of
208  // the fragment
209  if (dispatch_mode == ExecutorDispatchMode::KernelPerFragment) {
210  group_buffer_size = num_rows * query_mem_desc.getRowSize();
211  } else {
212  // otherwise, allocate a GPU buffer equivalent to the maximum GPU allocation size
213  group_buffer_size = g_max_memory_allocation_size / query_mem_desc.getRowSize();
214  }
215  } else {
216  group_buffer_size =
217  query_mem_desc.getBufferSizeBytes(ra_exe_unit, thread_count, device_type);
218  }
219  CHECK_GE(group_buffer_size, size_t(0));
220 
221  std::unique_ptr<int64_t, CheckedAllocDeleter> group_by_buffer_template;
222  if (!query_mem_desc.lazyInitGroups(device_type)) {
223  group_by_buffer_template.reset(
224  static_cast<int64_t*>(checked_malloc(group_buffer_size)));
225 
226  if (output_columnar) {
228  query_mem_desc, group_by_buffer_template.get(), init_agg_vals_, executor);
229  } else {
230  auto rows_ptr = group_by_buffer_template.get();
231  auto actual_entry_count = query_mem_desc.getEntryCount();
232  auto warp_size =
233  query_mem_desc.interleavedBins(device_type) ? executor->warpSize() : 1;
234  if (use_streaming_top_n(ra_exe_unit, query_mem_desc.didOutputColumnar())) {
235  const auto node_count_size = thread_count * sizeof(int64_t);
236  memset(rows_ptr, 0, node_count_size);
237  const auto n = ra_exe_unit.sort_info.offset + ra_exe_unit.sort_info.limit;
238  const auto rows_offset =
240  memset(rows_ptr + thread_count, -1, rows_offset - node_count_size);
241  rows_ptr += rows_offset / sizeof(int64_t);
242  actual_entry_count = n * thread_count;
243  warp_size = 1;
244  }
245  initGroups(query_mem_desc,
246  rows_ptr,
248  actual_entry_count,
249  warp_size,
250  executor);
251  }
252  }
253 
254  if (query_mem_desc.interleavedBins(device_type)) {
255  CHECK(query_mem_desc.hasKeylessHash());
256  }
257 
258  const auto step = device_type == ExecutorDeviceType::GPU &&
259  query_mem_desc.threadsShareMemory() &&
260  query_mem_desc.isGroupBy()
261  ? executor->blockSize()
262  : size_t(1);
263  const auto index_buffer_qw = device_type == ExecutorDeviceType::GPU && sort_on_gpu &&
264  query_mem_desc.hasKeylessHash()
265  ? query_mem_desc.getEntryCount()
266  : size_t(0);
267  const auto actual_group_buffer_size =
268  group_buffer_size + index_buffer_qw * sizeof(int64_t);
269  CHECK_GE(actual_group_buffer_size, group_buffer_size);
270  const auto group_buffers_count = !query_mem_desc.isGroupBy() ? 1 : num_buffers_;
271 
272  for (size_t i = 0; i < group_buffers_count; i += step) {
273  auto group_by_buffer =
274  alloc_group_by_buffer(actual_group_buffer_size, render_allocator_map);
275  if (!query_mem_desc.lazyInitGroups(device_type)) {
276  CHECK(group_by_buffer_template);
277  memcpy(group_by_buffer + index_buffer_qw,
278  group_by_buffer_template.get(),
279  group_buffer_size);
280  }
281  if (!render_allocator_map) {
282  row_set_mem_owner_->addGroupByBuffer(group_by_buffer);
283  }
284  group_by_buffers_.push_back(group_by_buffer);
285  for (size_t j = 1; j < step; ++j) {
286  group_by_buffers_.push_back(nullptr);
287  }
288  const auto column_frag_offsets =
289  get_col_frag_offsets(ra_exe_unit.target_exprs, frag_offsets);
290  const auto column_frag_sizes =
291  get_consistent_frags_sizes(ra_exe_unit.target_exprs, consistent_frag_sizes);
292  result_sets_.emplace_back(
293  new ResultSet(target_exprs_to_infos(ra_exe_unit.target_exprs, query_mem_desc),
294  executor->getColLazyFetchInfo(ra_exe_unit.target_exprs),
295  col_buffers,
296  column_frag_offsets,
297  column_frag_sizes,
298  device_type,
299  device_id,
302  executor));
303  result_sets_.back()->allocateStorage(reinterpret_cast<int8_t*>(group_by_buffer),
304  executor->plan_state_->init_agg_vals_);
305  for (size_t j = 1; j < step; ++j) {
306  result_sets_.emplace_back(nullptr);
307  }
308  }
309 }
std::vector< Analyzer::Expr * > target_exprs
void initGroups(const QueryMemoryDescriptor &query_mem_desc, int64_t *groups_buffer, const std::vector< int64_t > &init_vals, const int32_t groups_buffer_entry_count, const size_t warp_size, const Executor *executor)
bool use_streaming_top_n(const RelAlgExecutionUnit &ra_exe_unit, const bool output_columnar)
std::vector< ssize_t > allocateCountDistinctBuffers(const QueryMemoryDescriptor &query_mem_desc, const bool deferred, const Executor *executor)
const int8_t const int64_t * num_rows
int64_t * alloc_group_by_buffer(const size_t numBytes, RenderAllocatorMap *render_allocator_map)
const int64_t const uint32_t const uint32_t const uint32_t const bool const int8_t warp_size
DeviceAllocator * device_allocator_
size_t get_rows_offset_of_heaps(const size_t n, const size_t thread_count)
#define CHECK_GE(x, y)
Definition: Logger.h:200
void check_total_bitmap_memory(const QueryMemoryDescriptor &query_mem_desc)
size_t g_max_memory_allocation_size
Definition: Execute.cpp:92
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
bool useCudaBuffers() const
Definition: RenderInfo.cpp:60
const size_t limit
std::vector< std::vector< int64_t > > get_col_frag_offsets(const std::vector< Analyzer::Expr *> &target_exprs, const std::vector< std::vector< uint64_t >> &table_frag_offsets)
std::vector< int64_t > init_agg_vals_
const SortInfo sort_info
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:40
bool interleavedBins(const ExecutorDeviceType) const
size_t getBufferSizeBytes(const RelAlgExecutionUnit &ra_exe_unit, const unsigned thread_count, const ExecutorDeviceType device_type) const
const std::shared_ptr< Analyzer::Estimator > estimator
std::vector< int64_t * > group_by_buffers_
void initColumnarGroups(const QueryMemoryDescriptor &query_mem_desc, int64_t *groups_buffer, const std::vector< int64_t > &init_vals, const Executor *executor)
std::vector< int64_t > get_consistent_frags_sizes(const std::vector< Analyzer::Expr *> &target_exprs, const std::vector< int64_t > &table_frag_sizes)
static QueryMemoryDescriptor fixupQueryMemoryDescriptor(const QueryMemoryDescriptor &)
Definition: ResultSet.cpp:452
size_t computeNumberOfBuffers(const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, const Executor *executor) const
std::vector< TargetInfo > target_exprs_to_infos(const std::vector< Analyzer::Expr *> &targets, const QueryMemoryDescriptor &query_mem_desc)
#define CHECK(condition)
Definition: Logger.h:187
const size_t offset
std::vector< std::unique_ptr< ResultSet > > result_sets_
bool lazyInitGroups(const ExecutorDeviceType) const
void allocateCountDistinctGpuMem(const QueryMemoryDescriptor &query_mem_desc)
void sort_on_gpu(int64_t *val_buff, int32_t *key_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes, ThrustAllocator &alloc)
+ Here is the call graph for this function:

Member Function Documentation

◆ allocateCountDistinctBitmap()

int64_t QueryMemoryInitializer::allocateCountDistinctBitmap ( const size_t  bitmap_byte_sz)
private

Definition at line 549 of file QueryMemoryInitializer.cpp.

References CHECK, checked_calloc(), count_distinct_bitmap_crt_ptr_, count_distinct_bitmap_host_mem_, and row_set_mem_owner_.

Referenced by allocateCountDistinctBuffers(), getNumBuffers(), and initColumnPerRow().

549  {
553  count_distinct_bitmap_crt_ptr_ += bitmap_byte_sz;
554  row_set_mem_owner_->addCountDistinctBuffer(ptr, bitmap_byte_sz, false);
555  return reinterpret_cast<int64_t>(ptr);
556  }
557  auto count_distinct_buffer = static_cast<int8_t*>(checked_calloc(bitmap_byte_sz, 1));
558  row_set_mem_owner_->addCountDistinctBuffer(count_distinct_buffer, bitmap_byte_sz, true);
559  return reinterpret_cast<int64_t>(count_distinct_buffer);
560 }
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
void * checked_calloc(const size_t nmemb, const size_t size)
Definition: checked_alloc.h:48
#define CHECK(condition)
Definition: Logger.h:187
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ allocateCountDistinctBuffers()

std::vector< ssize_t > QueryMemoryInitializer::allocateCountDistinctBuffers ( const QueryMemoryDescriptor query_mem_desc,
const bool  deferred,
const Executor executor 
)
private

Definition at line 503 of file QueryMemoryInitializer.cpp.

References agg_col_count, allocateCountDistinctBitmap(), allocateCountDistinctSet(), Bitmap, CHECK, CHECK_EQ, CHECK_GE, CHECK_LT, g_bigint_count, get_target_info(), QueryMemoryDescriptor::getCountDistinctDescriptor(), QueryMemoryDescriptor::getLogicalSlotWidthBytes(), QueryMemoryDescriptor::getSlotCount(), QueryMemoryDescriptor::getSlotIndexForSingleSlotCol(), init_agg_vals_, Invalid, is_distinct_target(), kAPPROX_COUNT_DISTINCT, kCOUNT, and StdSet.

Referenced by getNumBuffers(), initGroups(), and QueryMemoryInitializer().

506  {
507  const size_t agg_col_count{query_mem_desc.getSlotCount()};
508  std::vector<ssize_t> agg_bitmap_size(deferred ? agg_col_count : 0);
509 
510  CHECK_GE(agg_col_count, executor->plan_state_->target_exprs_.size());
511  for (size_t target_idx = 0; target_idx < executor->plan_state_->target_exprs_.size();
512  ++target_idx) {
513  const auto target_expr = executor->plan_state_->target_exprs_[target_idx];
514  const auto agg_info = get_target_info(target_expr, g_bigint_count);
515  if (is_distinct_target(agg_info)) {
516  CHECK(agg_info.is_agg &&
517  (agg_info.agg_kind == kCOUNT || agg_info.agg_kind == kAPPROX_COUNT_DISTINCT));
518  CHECK(!agg_info.sql_type.is_varlen());
519 
520  const auto agg_col_idx = query_mem_desc.getSlotIndexForSingleSlotCol(target_idx);
521  CHECK_LT(static_cast<size_t>(agg_col_idx), agg_col_count);
522 
523  CHECK_EQ(static_cast<size_t>(query_mem_desc.getLogicalSlotWidthBytes(agg_col_idx)),
524  sizeof(int64_t));
525  const auto& count_distinct_desc =
526  query_mem_desc.getCountDistinctDescriptor(target_idx);
527  CHECK(count_distinct_desc.impl_type_ != CountDistinctImplType::Invalid);
528  if (count_distinct_desc.impl_type_ == CountDistinctImplType::Bitmap) {
529  const auto bitmap_byte_sz = count_distinct_desc.bitmapPaddedSizeBytes();
530  if (deferred) {
531  agg_bitmap_size[agg_col_idx] = bitmap_byte_sz;
532  } else {
533  init_agg_vals_[agg_col_idx] = allocateCountDistinctBitmap(bitmap_byte_sz);
534  }
535  } else {
536  CHECK(count_distinct_desc.impl_type_ == CountDistinctImplType::StdSet);
537  if (deferred) {
538  agg_bitmap_size[agg_col_idx] = -1;
539  } else {
540  init_agg_vals_[agg_col_idx] = allocateCountDistinctSet();
541  }
542  }
543  }
544  }
545 
546  return agg_bitmap_size;
547 }
#define CHECK_EQ(x, y)
Definition: Logger.h:195
const int64_t const uint32_t const uint32_t const uint32_t agg_col_count
const int8_t getSlotIndexForSingleSlotCol(const size_t col_idx) const
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:65
#define CHECK_GE(x, y)
Definition: Logger.h:200
std::vector< int64_t > init_agg_vals_
bool g_bigint_count
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:116
#define CHECK_LT(x, y)
Definition: Logger.h:197
Definition: sqldefs.h:71
#define CHECK(condition)
Definition: Logger.h:187
const int8_t getLogicalSlotWidthBytes(const size_t slot_idx) const
const CountDistinctDescriptor & getCountDistinctDescriptor(const size_t idx) const
int64_t allocateCountDistinctBitmap(const size_t bitmap_byte_sz)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ allocateCountDistinctGpuMem()

void QueryMemoryInitializer::allocateCountDistinctGpuMem ( const QueryMemoryDescriptor query_mem_desc)
private

Definition at line 469 of file QueryMemoryInitializer.cpp.

References Allocator::alloc(), Bitmap, CHECK, checked_malloc(), count_distinct_bitmap_crt_ptr_, count_distinct_bitmap_host_mem_, count_distinct_bitmap_mem_, count_distinct_bitmap_mem_bytes_, QueryMemoryDescriptor::countDistinctDescriptorsLogicallyEmpty(), device_allocator_, QueryMemoryDescriptor::getCountDistinctDescriptor(), QueryMemoryDescriptor::getCountDistinctDescriptorsSize(), QueryMemoryDescriptor::getEntryCount(), Invalid, row_set_mem_owner_, and DeviceAllocator::zeroDeviceMem().

Referenced by getNumBuffers(), and QueryMemoryInitializer().

470  {
471  if (query_mem_desc.countDistinctDescriptorsLogicallyEmpty()) {
472  return;
473  }
475 
476  size_t total_bytes_per_entry{0};
477  const size_t num_count_distinct_descs =
478  query_mem_desc.getCountDistinctDescriptorsSize();
479  for (size_t i = 0; i < num_count_distinct_descs; i++) {
480  const auto count_distinct_desc = query_mem_desc.getCountDistinctDescriptor(i);
481  if (count_distinct_desc.impl_type_ == CountDistinctImplType::Invalid) {
482  continue;
483  }
484  CHECK(count_distinct_desc.impl_type_ == CountDistinctImplType::Bitmap);
485  total_bytes_per_entry += count_distinct_desc.bitmapPaddedSizeBytes();
486  }
487 
489  total_bytes_per_entry * query_mem_desc.getEntryCount();
490  count_distinct_bitmap_mem_ = reinterpret_cast<CUdeviceptr>(
492  device_allocator_->zeroDeviceMem(reinterpret_cast<int8_t*>(count_distinct_bitmap_mem_),
494 
496  static_cast<int8_t*>(checked_malloc(count_distinct_bitmap_mem_bytes_));
497  row_set_mem_owner_->addCountDistinctBuffer(
499 }
DeviceAllocator * device_allocator_
unsigned long long CUdeviceptr
Definition: nocuda.h:27
size_t getCountDistinctDescriptorsSize() const
virtual int8_t * alloc(const size_t num_bytes)=0
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
void * checked_malloc(const size_t size)
Definition: checked_alloc.h:40
virtual void zeroDeviceMem(int8_t *device_ptr, const size_t num_bytes) const =0
bool countDistinctDescriptorsLogicallyEmpty() const
#define CHECK(condition)
Definition: Logger.h:187
const CountDistinctDescriptor & getCountDistinctDescriptor(const size_t idx) const
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ allocateCountDistinctSet()

int64_t QueryMemoryInitializer::allocateCountDistinctSet ( )
private

Definition at line 562 of file QueryMemoryInitializer.cpp.

References Allocator::alloc(), CHECK, CHECK_EQ, DeviceAllocator::copyToDevice(), create_dev_group_by_buffers(), device_allocator_, QueryMemoryDescriptor::didOutputColumnar(), streaming_top_n::get_heap_size(), streaming_top_n::get_rows_offset_of_heaps(), RenderAllocator::getAllocatedSize(), QueryMemoryDescriptor::getBufferSizeBytes(), QueryMemoryDescriptor::getEffectiveKeyWidth(), getGroupByBuffersSize(), QueryMemoryDescriptor::getPaddedSlotWidthBytes(), QueryMemoryDescriptor::getRowSize(), QueryMemoryDescriptor::getSlotCount(), GPU, group_by_buffers_, QueryMemoryDescriptor::groupColWidthsSize(), groups_buffer_size, QueryMemoryDescriptor::hasKeylessHash(), init_columnar_group_by_buffer_on_device(), init_group_by_buffer_on_device(), QueryMemoryDescriptor::interleavedBins(), QueryMemoryDescriptor::lazyInitGroups(), SortInfo::limit, num_rows_, SortInfo::offset, row_set_mem_owner_, DeviceAllocator::setDeviceMem(), RelAlgExecutionUnit::sort_info, QueryMemoryDescriptor::threadsShareMemory(), RelAlgExecutionUnit::use_bump_allocator, use_streaming_top_n(), warp_size, and DeviceAllocator::zeroDeviceMem().

Referenced by allocateCountDistinctBuffers(), getNumBuffers(), and initColumnPerRow().

562  {
563  auto count_distinct_set = new std::set<int64_t>();
564  row_set_mem_owner_->addCountDistinctSet(count_distinct_set);
565  return reinterpret_cast<int64_t>(count_distinct_set);
566 }
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ applyStreamingTopNOffsetCpu()

void QueryMemoryInitializer::applyStreamingTopNOffsetCpu ( const QueryMemoryDescriptor query_mem_desc,
const RelAlgExecutionUnit ra_exe_unit 
)
private

Definition at line 829 of file QueryMemoryInitializer.cpp.

References CHECK_EQ, CPU, streaming_top_n::get_rows_copy_from_heaps(), QueryMemoryDescriptor::getBufferSizeBytes(), QueryMemoryDescriptor::getEntryCount(), QueryMemoryDescriptor::getRowSize(), group_by_buffers_, SortInfo::limit, SortInfo::offset, and RelAlgExecutionUnit::sort_info.

Referenced by getNumBuffers().

831  {
832  CHECK_EQ(group_by_buffers_.size(), size_t(1));
833 
834  const auto rows_copy = streaming_top_n::get_rows_copy_from_heaps(
836  query_mem_desc.getBufferSizeBytes(ra_exe_unit, 1, ExecutorDeviceType::CPU),
837  ra_exe_unit.sort_info.offset + ra_exe_unit.sort_info.limit,
838  1);
839  CHECK_EQ(rows_copy.size(),
840  query_mem_desc.getEntryCount() * query_mem_desc.getRowSize());
841  memcpy(group_by_buffers_[0], &rows_copy[0], rows_copy.size());
842 }
#define CHECK_EQ(x, y)
Definition: Logger.h:195
const size_t limit
const SortInfo sort_info
size_t getBufferSizeBytes(const RelAlgExecutionUnit &ra_exe_unit, const unsigned thread_count, const ExecutorDeviceType device_type) const
std::vector< int64_t * > group_by_buffers_
std::vector< int8_t > get_rows_copy_from_heaps(const int64_t *heaps, const size_t heaps_size, const size_t n, const size_t thread_count)
const size_t offset
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ applyStreamingTopNOffsetGpu()

void QueryMemoryInitializer::applyStreamingTopNOffsetGpu ( Data_Namespace::DataMgr data_mgr,
const QueryMemoryDescriptor query_mem_desc,
const GpuGroupByBuffers gpu_group_by_buffers,
const RelAlgExecutionUnit ra_exe_unit,
const unsigned  total_thread_count,
const int  device_id 
)
private

Definition at line 844 of file QueryMemoryInitializer.cpp.

References CHECK_EQ, QueryMemoryDescriptor::getEntryCount(), QueryMemoryDescriptor::getRowSize(), group_by_buffers_, num_buffers_, GpuGroupByBuffers::second, and UNREACHABLE.

Referenced by getNumBuffers().

850  {
851 #ifdef HAVE_CUDA
853 
854  const auto rows_copy = pick_top_n_rows_from_dev_heaps(
855  data_mgr,
856  reinterpret_cast<int64_t*>(gpu_group_by_buffers.second),
857  ra_exe_unit,
858  query_mem_desc,
859  total_thread_count,
860  device_id);
861  CHECK_EQ(
862  rows_copy.size(),
863  static_cast<size_t>(query_mem_desc.getEntryCount() * query_mem_desc.getRowSize()));
864  memcpy(group_by_buffers_[0], &rows_copy[0], rows_copy.size());
865 #else
866  UNREACHABLE();
867 #endif
868 }
#define CHECK_EQ(x, y)
Definition: Logger.h:195
#define UNREACHABLE()
Definition: Logger.h:231
CUdeviceptr second
Definition: GpuMemUtils.h:61
std::vector< int64_t * > group_by_buffers_
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ compactProjectionBuffersCpu()

void QueryMemoryInitializer::compactProjectionBuffersCpu ( const QueryMemoryDescriptor query_mem_desc,
const size_t  projection_count 
)
private

Definition at line 756 of file QueryMemoryInitializer.cpp.

References CHECK, anonymous_namespace{QueryMemoryInitializer.cpp}::compact_projection_buffer_for_cpu_columnar(), QueryMemoryDescriptor::getEntryCount(), group_by_buffers_, and result_sets_.

Referenced by getNumBuffers().

758  {
759  const auto num_allocated_rows =
760  std::min(projection_count, query_mem_desc.getEntryCount());
761 
762  // copy the results from the main buffer into projection_buffer
764  query_mem_desc,
765  reinterpret_cast<int8_t*>(group_by_buffers_[0]),
766  num_allocated_rows);
767 
768  // update the entry count for the result set, and its underlying storage
769  CHECK(!result_sets_.empty());
770  result_sets_.front()->updateStorageEntryCount(num_allocated_rows);
771 }
void compact_projection_buffer_for_cpu_columnar(const QueryMemoryDescriptor &query_mem_desc, int8_t *projection_buffer, const size_t projection_count)
std::vector< int64_t * > group_by_buffers_
#define CHECK(condition)
Definition: Logger.h:187
std::vector< std::unique_ptr< ResultSet > > result_sets_
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ compactProjectionBuffersGpu()

void QueryMemoryInitializer::compactProjectionBuffersGpu ( const QueryMemoryDescriptor query_mem_desc,
Data_Namespace::DataMgr data_mgr,
const GpuGroupByBuffers gpu_group_by_buffers,
const size_t  projection_count,
const int  device_id 
)
private

Definition at line 773 of file QueryMemoryInitializer.cpp.

References CHECK, copy_projection_buffer_from_gpu_columnar(), QueryMemoryDescriptor::getEntryCount(), group_by_buffers_, and result_sets_.

Referenced by getNumBuffers().

778  {
779  // store total number of allocated rows:
780  const auto num_allocated_rows =
781  std::min(projection_count, query_mem_desc.getEntryCount());
782 
783  // copy the results from the main buffer into projection_buffer
785  data_mgr,
786  gpu_group_by_buffers,
787  query_mem_desc,
788  reinterpret_cast<int8_t*>(group_by_buffers_[0]),
789  num_allocated_rows,
790  device_id);
791 
792  // update the entry count for the result set, and its underlying storage
793  CHECK(!result_sets_.empty());
794  result_sets_.front()->updateStorageEntryCount(num_allocated_rows);
795 }
std::vector< int64_t * > group_by_buffers_
#define CHECK(condition)
Definition: Logger.h:187
void copy_projection_buffer_from_gpu_columnar(Data_Namespace::DataMgr *data_mgr, const GpuGroupByBuffers &gpu_group_by_buffers, const QueryMemoryDescriptor &query_mem_desc, int8_t *projection_buffer, const size_t projection_count, const int device_id)
std::vector< std::unique_ptr< ResultSet > > result_sets_
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ computeNumberOfBuffers()

size_t QueryMemoryInitializer::computeNumberOfBuffers ( const QueryMemoryDescriptor query_mem_desc,
const ExecutorDeviceType  device_type,
const Executor executor 
) const
private

Definition at line 712 of file QueryMemoryInitializer.cpp.

References QueryMemoryDescriptor::blocksShareMemory(), and CPU.

Referenced by getNumBuffers().

715  {
716  return device_type == ExecutorDeviceType::CPU
717  ? 1
718  : executor->blockSize() *
719  (query_mem_desc.blocksShareMemory() ? 1 : executor->gridSize());
720 }
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ copyGroupByBuffersFromGpu()

void QueryMemoryInitializer::copyGroupByBuffersFromGpu ( Data_Namespace::DataMgr data_mgr,
const QueryMemoryDescriptor query_mem_desc,
const size_t  entry_count,
const GpuGroupByBuffers gpu_group_by_buffers,
const RelAlgExecutionUnit ra_exe_unit,
const unsigned  block_size_x,
const unsigned  grid_size_x,
const int  device_id,
const bool  prepend_index_buffer 
) const
private

Definition at line 797 of file QueryMemoryInitializer.cpp.

References copy_group_by_buffers_from_gpu(), QueryMemoryDescriptor::didOutputColumnar(), streaming_top_n::get_heap_size(), QueryMemoryDescriptor::getBufferSizeBytes(), QueryMemoryDescriptor::getRowSize(), GPU, group_by_buffers_, SortInfo::limit, SortInfo::offset, GpuGroupByBuffers::second, RelAlgExecutionUnit::sort_info, and use_streaming_top_n().

Referenced by getNumBuffers().

806  {
807  const auto thread_count = block_size_x * grid_size_x;
808 
809  size_t total_buff_size{0};
810  if (use_streaming_top_n(ra_exe_unit, query_mem_desc.didOutputColumnar())) {
811  const size_t n = ra_exe_unit.sort_info.offset + ra_exe_unit.sort_info.limit;
812  total_buff_size =
813  streaming_top_n::get_heap_size(query_mem_desc.getRowSize(), n, thread_count);
814  } else {
815  total_buff_size =
816  query_mem_desc.getBufferSizeBytes(ExecutorDeviceType::GPU, entry_count);
817  }
820  total_buff_size,
821  gpu_group_by_buffers.second,
822  query_mem_desc,
823  block_size_x,
824  grid_size_x,
825  device_id,
826  prepend_index_buffer);
827 }
bool use_streaming_top_n(const RelAlgExecutionUnit &ra_exe_unit, const bool output_columnar)
const size_t limit
void copy_group_by_buffers_from_gpu(Data_Namespace::DataMgr *data_mgr, const std::vector< int64_t *> &group_by_buffers, const size_t groups_buffer_size, const CUdeviceptr group_by_dev_buffers_mem, const QueryMemoryDescriptor &query_mem_desc, const unsigned block_size_x, const unsigned grid_size_x, const int device_id, const bool prepend_index_buffer)
CUdeviceptr second
Definition: GpuMemUtils.h:61
const SortInfo sort_info
size_t getBufferSizeBytes(const RelAlgExecutionUnit &ra_exe_unit, const unsigned thread_count, const ExecutorDeviceType device_type) const
std::vector< int64_t * > group_by_buffers_
size_t get_heap_size(const size_t row_size, const size_t n, const size_t thread_count)
const size_t offset
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ getAggInitValForIndex()

int64_t QueryMemoryInitializer::getAggInitValForIndex ( const size_t  index) const
inline

Definition at line 76 of file QueryMemoryInitializer.h.

References CHECK_LT, and init_agg_vals_.

76  {
77  CHECK_LT(index, init_agg_vals_.size());
78  return init_agg_vals_[index];
79  }
std::vector< int64_t > init_agg_vals_
#define CHECK_LT(x, y)
Definition: Logger.h:197

◆ getCountDistinctBitmapBytes()

const auto QueryMemoryInitializer::getCountDistinctBitmapBytes ( ) const
inline

Definition at line 57 of file QueryMemoryInitializer.h.

References count_distinct_bitmap_mem_bytes_.

57  {
59  }

◆ getCountDistinctBitmapPtr()

const auto QueryMemoryInitializer::getCountDistinctBitmapPtr ( ) const
inline

Definition at line 53 of file QueryMemoryInitializer.h.

References count_distinct_bitmap_mem_.

◆ getCountDistinctHostPtr()

const auto QueryMemoryInitializer::getCountDistinctHostPtr ( ) const
inline

◆ getGroupByBuffersPtr()

const auto QueryMemoryInitializer::getGroupByBuffersPtr ( )
inline

Definition at line 81 of file QueryMemoryInitializer.h.

References group_by_buffers_.

81  {
82  return reinterpret_cast<int64_t**>(group_by_buffers_.data());
83  }
std::vector< int64_t * > group_by_buffers_

◆ getGroupByBuffersSize()

const auto QueryMemoryInitializer::getGroupByBuffersSize ( ) const
inline

Definition at line 85 of file QueryMemoryInitializer.h.

References group_by_buffers_.

Referenced by allocateCountDistinctSet().

85 { return group_by_buffers_.size(); }
std::vector< int64_t * > group_by_buffers_
+ Here is the caller graph for this function:

◆ getNumBuffers()

const auto QueryMemoryInitializer::getNumBuffers ( ) const
inline

◆ getResultSet()

ResultSet* QueryMemoryInitializer::getResultSet ( const size_t  index) const
inline

Definition at line 61 of file QueryMemoryInitializer.h.

References CHECK_LT, and result_sets_.

61  {
62  CHECK_LT(index, result_sets_.size());
63  return result_sets_[index].get();
64  }
#define CHECK_LT(x, y)
Definition: Logger.h:197
std::vector< std::unique_ptr< ResultSet > > result_sets_

◆ getResultSetOwned()

std::unique_ptr<ResultSet> QueryMemoryInitializer::getResultSetOwned ( const size_t  index)
inline

Definition at line 66 of file QueryMemoryInitializer.h.

References CHECK_LT, and result_sets_.

66  {
67  CHECK_LT(index, result_sets_.size());
68  return std::move(result_sets_[index]);
69  }
#define CHECK_LT(x, y)
Definition: Logger.h:197
std::vector< std::unique_ptr< ResultSet > > result_sets_

◆ initColumnarGroups()

void QueryMemoryInitializer::initColumnarGroups ( const QueryMemoryDescriptor query_mem_desc,
int64_t *  groups_buffer,
const std::vector< int64_t > &  init_vals,
const Executor executor 
)
private

Definition at line 367 of file QueryMemoryInitializer.cpp.

References agg_col_count, align_to_int64(), CHECK, CHECK_LT, EMPTY_KEY_64, g_bigint_count, get_target_info(), QueryMemoryDescriptor::getEntryCount(), QueryMemoryDescriptor::getPaddedSlotWidthBytes(), QueryMemoryDescriptor::getSlotCount(), QueryMemoryDescriptor::groupColWidthsSize(), groups_buffer_entry_count, QueryMemoryDescriptor::hasKeylessHash(), and is_distinct_target().

Referenced by getNumBuffers(), and QueryMemoryInitializer().

371  {
372  CHECK(groups_buffer);
373  for (const auto target_expr : executor->plan_state_->target_exprs_) {
374  const auto agg_info = get_target_info(target_expr, g_bigint_count);
375  CHECK(!is_distinct_target(agg_info));
376  }
377  const int32_t agg_col_count = query_mem_desc.getSlotCount();
378  auto buffer_ptr = reinterpret_cast<int8_t*>(groups_buffer);
379 
380  const auto groups_buffer_entry_count = query_mem_desc.getEntryCount();
381  if (!query_mem_desc.hasKeylessHash()) {
382  const size_t key_count{query_mem_desc.groupColWidthsSize()};
383  for (size_t i = 0; i < key_count; ++i) {
384  buffer_ptr = initColumnarBuffer<int64_t>(reinterpret_cast<int64_t*>(buffer_ptr),
385  EMPTY_KEY_64,
387  }
388  }
389  // initializing all aggregate columns:
390  int32_t init_val_idx = 0;
391  for (int32_t i = 0; i < agg_col_count; ++i) {
392  if (query_mem_desc.getPaddedSlotWidthBytes(i) > 0) {
393  CHECK_LT(static_cast<size_t>(init_val_idx), init_vals.size());
394  switch (query_mem_desc.getPaddedSlotWidthBytes(i)) {
395  case 1:
396  buffer_ptr = initColumnarBuffer<int8_t>(
397  buffer_ptr, init_vals[init_val_idx++], groups_buffer_entry_count);
398  break;
399  case 2:
400  buffer_ptr = initColumnarBuffer<int16_t>(reinterpret_cast<int16_t*>(buffer_ptr),
401  init_vals[init_val_idx++],
403  break;
404  case 4:
405  buffer_ptr = initColumnarBuffer<int32_t>(reinterpret_cast<int32_t*>(buffer_ptr),
406  init_vals[init_val_idx++],
408  break;
409  case 8:
410  buffer_ptr = initColumnarBuffer<int64_t>(reinterpret_cast<int64_t*>(buffer_ptr),
411  init_vals[init_val_idx++],
413  break;
414  case 0:
415  break;
416  default:
417  CHECK(false);
418  }
419 
420  buffer_ptr = align_to_int64(buffer_ptr);
421  }
422  }
423 }
const int64_t const uint32_t const uint32_t const uint32_t agg_col_count
#define EMPTY_KEY_64
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:65
const int64_t const uint32_t groups_buffer_entry_count
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
bool g_bigint_count
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:116
#define CHECK_LT(x, y)
Definition: Logger.h:197
#define CHECK(condition)
Definition: Logger.h:187
const int64_t * init_vals
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ initColumnPerRow()

void QueryMemoryInitializer::initColumnPerRow ( const QueryMemoryDescriptor query_mem_desc,
int8_t *  row_ptr,
const size_t  bin,
const std::vector< int64_t > &  init_vals,
const std::vector< ssize_t > &  bitmap_sizes 
)
private

Definition at line 425 of file QueryMemoryInitializer.cpp.

References allocateCountDistinctBitmap(), allocateCountDistinctSet(), CHECK, CHECK_EQ, CHECK_LT, QueryMemoryDescriptor::getNextColOffInBytes(), QueryMemoryDescriptor::getPaddedSlotWidthBytes(), QueryMemoryDescriptor::getSlotCount(), and QueryMemoryDescriptor::isGroupBy().

Referenced by getNumBuffers(), and initGroups().

429  {
430  int8_t* col_ptr = row_ptr;
431  size_t init_vec_idx = 0;
432  for (size_t col_idx = 0; col_idx < query_mem_desc.getSlotCount();
433  col_ptr += query_mem_desc.getNextColOffInBytes(col_ptr, bin, col_idx++)) {
434  const ssize_t bm_sz{bitmap_sizes[col_idx]};
435  int64_t init_val{0};
436  if (!bm_sz || !query_mem_desc.isGroupBy()) {
437  if (query_mem_desc.getPaddedSlotWidthBytes(col_idx) > 0) {
438  CHECK_LT(init_vec_idx, init_vals.size());
439  init_val = init_vals[init_vec_idx++];
440  }
441  } else {
442  CHECK_EQ(static_cast<size_t>(query_mem_desc.getPaddedSlotWidthBytes(col_idx)),
443  sizeof(int64_t));
444  init_val =
446  ++init_vec_idx;
447  }
448  switch (query_mem_desc.getPaddedSlotWidthBytes(col_idx)) {
449  case 1:
450  *col_ptr = static_cast<int8_t>(init_val);
451  break;
452  case 2:
453  *reinterpret_cast<int16_t*>(col_ptr) = (int16_t)init_val;
454  break;
455  case 4:
456  *reinterpret_cast<int32_t*>(col_ptr) = (int32_t)init_val;
457  break;
458  case 8:
459  *reinterpret_cast<int64_t*>(col_ptr) = init_val;
460  break;
461  case 0:
462  continue;
463  default:
464  CHECK(false);
465  }
466  }
467 }
#define CHECK_EQ(x, y)
Definition: Logger.h:195
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
size_t getNextColOffInBytes(const int8_t *col_ptr, const size_t bin, const size_t col_idx) const
#define CHECK_LT(x, y)
Definition: Logger.h:197
#define CHECK(condition)
Definition: Logger.h:187
const int64_t * init_vals
int64_t allocateCountDistinctBitmap(const size_t bitmap_byte_sz)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ initGroups()

void QueryMemoryInitializer::initGroups ( const QueryMemoryDescriptor query_mem_desc,
int64_t *  groups_buffer,
const std::vector< int64_t > &  init_vals,
const int32_t  groups_buffer_entry_count,
const size_t  warp_size,
const Executor executor 
)
private

Definition at line 311 of file QueryMemoryInitializer.cpp.

References allocateCountDistinctBuffers(), CHECK, fill_empty_key(), ResultSet::fixupQueryMemoryDescriptor(), QueryMemoryDescriptor::getColOffInBytes(), QueryMemoryDescriptor::getEffectiveKeyWidth(), QueryMemoryDescriptor::getRowSize(), QueryMemoryDescriptor::groupColWidthsSize(), groups_buffer_entry_count, QueryMemoryDescriptor::hasKeylessHash(), initColumnPerRow(), and warp_size.

Referenced by getNumBuffers(), and QueryMemoryInitializer().

316  {
317  const size_t key_count{query_mem_desc.groupColWidthsSize()};
318  const size_t row_size{query_mem_desc.getRowSize()};
319  const size_t col_base_off{query_mem_desc.getColOffInBytes(0)};
320 
321  auto agg_bitmap_size = allocateCountDistinctBuffers(query_mem_desc, true, executor);
322  auto buffer_ptr = reinterpret_cast<int8_t*>(groups_buffer);
323 
324  const auto query_mem_desc_fixedup =
326 
327  if (query_mem_desc.hasKeylessHash()) {
328  CHECK(warp_size >= 1);
329  CHECK(key_count == 1);
330  for (size_t warp_idx = 0; warp_idx < warp_size; ++warp_idx) {
331  for (size_t bin = 0; bin < static_cast<size_t>(groups_buffer_entry_count);
332  ++bin, buffer_ptr += row_size) {
333  initColumnPerRow(query_mem_desc_fixedup,
334  &buffer_ptr[col_base_off],
335  bin,
336  init_vals,
337  agg_bitmap_size);
338  }
339  }
340  return;
341  }
342 
343  for (size_t bin = 0; bin < static_cast<size_t>(groups_buffer_entry_count);
344  ++bin, buffer_ptr += row_size) {
345  fill_empty_key(buffer_ptr, key_count, query_mem_desc.getEffectiveKeyWidth());
346  initColumnPerRow(query_mem_desc_fixedup,
347  &buffer_ptr[col_base_off],
348  bin,
349  init_vals,
350  agg_bitmap_size);
351  }
352 }
std::vector< ssize_t > allocateCountDistinctBuffers(const QueryMemoryDescriptor &query_mem_desc, const bool deferred, const Executor *executor)
const int64_t const uint32_t const uint32_t const uint32_t const bool const int8_t warp_size
void fill_empty_key(void *key_ptr, const size_t key_count, const size_t key_width)
const int64_t const uint32_t groups_buffer_entry_count
static QueryMemoryDescriptor fixupQueryMemoryDescriptor(const QueryMemoryDescriptor &)
Definition: ResultSet.cpp:452
#define CHECK(condition)
Definition: Logger.h:187
size_t getColOffInBytes(const size_t col_idx) const
const int64_t * init_vals
size_t getEffectiveKeyWidth() const
void initColumnPerRow(const QueryMemoryDescriptor &query_mem_desc, int8_t *row_ptr, const size_t bin, const std::vector< int64_t > &init_vals, const std::vector< ssize_t > &bitmap_sizes)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ resetResultSet()

void QueryMemoryInitializer::resetResultSet ( const size_t  index)
inline

Definition at line 71 of file QueryMemoryInitializer.h.

References CHECK_LT, and result_sets_.

71  {
72  CHECK_LT(index, result_sets_.size());
73  result_sets_[index].reset();
74  }
#define CHECK_LT(x, y)
Definition: Logger.h:197
std::vector< std::unique_ptr< ResultSet > > result_sets_

Friends And Related Function Documentation

◆ Executor

friend class Executor
friend

Definition at line 193 of file QueryMemoryInitializer.h.

Referenced by getNumBuffers().

◆ QueryExecutionContext

friend class QueryExecutionContext
friend

Definition at line 194 of file QueryMemoryInitializer.h.

Member Data Documentation

◆ count_distinct_bitmap_crt_ptr_

int8_t* QueryMemoryInitializer::count_distinct_bitmap_crt_ptr_
private

◆ count_distinct_bitmap_host_mem_

int8_t* QueryMemoryInitializer::count_distinct_bitmap_host_mem_
private

◆ count_distinct_bitmap_mem_

CUdeviceptr QueryMemoryInitializer::count_distinct_bitmap_mem_
private

◆ count_distinct_bitmap_mem_bytes_

size_t QueryMemoryInitializer::count_distinct_bitmap_mem_bytes_
private

◆ device_allocator_

DeviceAllocator* QueryMemoryInitializer::device_allocator_ {nullptr}
private

◆ group_by_buffers_

◆ init_agg_vals_

std::vector<int64_t> QueryMemoryInitializer::init_agg_vals_
private

◆ num_buffers_

const size_t QueryMemoryInitializer::num_buffers_
private

◆ num_rows_

const int64_t QueryMemoryInitializer::num_rows_
private

Definition at line 176 of file QueryMemoryInitializer.h.

Referenced by allocateCountDistinctSet().

◆ result_sets_

std::vector<std::unique_ptr<ResultSet> > QueryMemoryInitializer::result_sets_
private

◆ row_set_mem_owner_

std::shared_ptr<RowSetMemoryOwner> QueryMemoryInitializer::row_set_mem_owner_
private

The documentation for this class was generated from the following files: