OmniSciDB  95562058bd
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
QueryMemoryInitializer Class Reference

#include <QueryMemoryInitializer.h>

+ Collaboration diagram for QueryMemoryInitializer:

Public Member Functions

 QueryMemoryInitializer (const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc, const int device_id, const ExecutorDeviceType device_type, const ExecutorDispatchMode dispatch_mode, const bool output_columnar, const bool sort_on_gpu, const int64_t num_rows, const std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< std::vector< uint64_t >> &frag_offsets, RenderAllocatorMap *render_allocator_map, RenderInfo *render_info, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, DeviceAllocator *gpu_allocator, const Executor *executor)
 
 QueryMemoryInitializer (const TableFunctionExecutionUnit &exe_unit, const QueryMemoryDescriptor &query_mem_desc, const int device_id, const ExecutorDeviceType device_type, const int64_t num_rows, const std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< std::vector< uint64_t >> &frag_offsets, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, DeviceAllocator *device_allocator, const Executor *executor)
 
const auto getCountDistinctBitmapPtr () const
 
const auto getCountDistinctHostPtr () const
 
const auto getCountDistinctBitmapBytes () const
 
ResultSetgetResultSet (const size_t index) const
 
std::unique_ptr< ResultSetgetResultSetOwned (const size_t index)
 
void resetResultSet (const size_t index)
 
int64_t getAggInitValForIndex (const size_t index) const
 
const auto getGroupByBuffersPtr ()
 
const auto getGroupByBuffersSize () const
 
const auto getNumBuffers () const
 
void copyGroupByBuffersFromGpu (Data_Namespace::DataMgr *data_mgr, const QueryMemoryDescriptor &query_mem_desc, const size_t entry_count, const GpuGroupByBuffers &gpu_group_by_buffers, const RelAlgExecutionUnit *ra_exe_unit, const unsigned block_size_x, const unsigned grid_size_x, const int device_id, const bool prepend_index_buffer) const
 

Private Member Functions

void initGroupByBuffer (int64_t *buffer, const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, const bool output_columnar, const Executor *executor)
 
void initGroups (const QueryMemoryDescriptor &query_mem_desc, int64_t *groups_buffer, const std::vector< int64_t > &init_vals, const int32_t groups_buffer_entry_count, const size_t warp_size, const Executor *executor)
 
void initColumnarGroups (const QueryMemoryDescriptor &query_mem_desc, int64_t *groups_buffer, const std::vector< int64_t > &init_vals, const Executor *executor)
 
void initColumnPerRow (const QueryMemoryDescriptor &query_mem_desc, int8_t *row_ptr, const size_t bin, const std::vector< int64_t > &init_vals, const std::vector< int64_t > &bitmap_sizes)
 
void allocateCountDistinctGpuMem (const QueryMemoryDescriptor &query_mem_desc)
 
std::vector< int64_t > allocateCountDistinctBuffers (const QueryMemoryDescriptor &query_mem_desc, const bool deferred, const Executor *executor)
 
int64_t allocateCountDistinctBitmap (const size_t bitmap_byte_sz)
 
int64_t allocateCountDistinctSet ()
 
size_t computeNumberOfBuffers (const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, const Executor *executor) const
 
void compactProjectionBuffersCpu (const QueryMemoryDescriptor &query_mem_desc, const size_t projection_count)
 
void compactProjectionBuffersGpu (const QueryMemoryDescriptor &query_mem_desc, Data_Namespace::DataMgr *data_mgr, const GpuGroupByBuffers &gpu_group_by_buffers, const size_t projection_count, const int device_id)
 
void applyStreamingTopNOffsetCpu (const QueryMemoryDescriptor &query_mem_desc, const RelAlgExecutionUnit &ra_exe_unit)
 
void applyStreamingTopNOffsetGpu (Data_Namespace::DataMgr *data_mgr, const QueryMemoryDescriptor &query_mem_desc, const GpuGroupByBuffers &gpu_group_by_buffers, const RelAlgExecutionUnit &ra_exe_unit, const unsigned total_thread_count, const int device_id)
 

Private Attributes

const int64_t num_rows_
 
std::shared_ptr
< RowSetMemoryOwner
row_set_mem_owner_
 
std::vector< std::unique_ptr
< ResultSet > > 
result_sets_
 
std::vector< int64_t > init_agg_vals_
 
const size_t num_buffers_
 
std::vector< int64_t * > group_by_buffers_
 
CUdeviceptr count_distinct_bitmap_mem_
 
size_t count_distinct_bitmap_mem_bytes_
 
int8_t * count_distinct_bitmap_crt_ptr_
 
int8_t * count_distinct_bitmap_host_mem_
 
DeviceAllocatordevice_allocator_ {nullptr}
 
std::vector
< Data_Namespace::AbstractBuffer * > 
temporary_buffers_
 

Friends

class Executor
 
class QueryExecutionContext
 

Detailed Description

Definition at line 35 of file QueryMemoryInitializer.h.

Constructor & Destructor Documentation

QueryMemoryInitializer::QueryMemoryInitializer ( const RelAlgExecutionUnit ra_exe_unit,
const QueryMemoryDescriptor query_mem_desc,
const int  device_id,
const ExecutorDeviceType  device_type,
const ExecutorDispatchMode  dispatch_mode,
const bool  output_columnar,
const bool  sort_on_gpu,
const int64_t  num_rows,
const std::vector< std::vector< const int8_t * >> &  col_buffers,
const std::vector< std::vector< uint64_t >> &  frag_offsets,
RenderAllocatorMap render_allocator_map,
RenderInfo render_info,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
DeviceAllocator gpu_allocator,
const Executor executor 
)

Definition at line 153 of file QueryMemoryInitializer.cpp.

References anonymous_namespace{QueryMemoryInitializer.cpp}::alloc_group_by_buffer(), allocateCountDistinctBuffers(), allocateCountDistinctGpuMem(), CHECK, CHECK_GE, anonymous_namespace{QueryMemoryInitializer.cpp}::check_total_bitmap_memory(), RelAlgExecutionUnit::estimator, ResultSet::fixupQueryMemoryDescriptor(), g_max_memory_allocation_size, anonymous_namespace{QueryMemoryInitializer.cpp}::get_col_frag_offsets(), anonymous_namespace{QueryMemoryInitializer.cpp}::get_consistent_frags_sizes(), QueryMemoryDescriptor::getBufferSizeBytes(), QueryMemoryDescriptor::getEntryCount(), QueryMemoryDescriptor::getRowSize(), GPU, group_by_buffers_, QueryMemoryDescriptor::hasKeylessHash(), initGroupByBuffer(), QueryMemoryDescriptor::interleavedBins(), QueryMemoryDescriptor::isGroupBy(), KernelPerFragment, QueryMemoryDescriptor::lazyInitGroups(), num_buffers_, result_sets_, row_set_mem_owner_, RelAlgExecutionUnit::target_exprs, target_exprs_to_infos(), QueryMemoryDescriptor::threadsShareMemory(), RelAlgExecutionUnit::use_bump_allocator, and RenderInfo::useCudaBuffers().

170  , row_set_mem_owner_(row_set_mem_owner)
171  , init_agg_vals_(executor->plan_state_->init_agg_vals_)
172  , num_buffers_(computeNumberOfBuffers(query_mem_desc, device_type, executor))
177  , device_allocator_(device_allocator) {
178  CHECK(!sort_on_gpu || output_columnar);
179 
180  const auto& consistent_frag_sizes = get_consistent_frags_sizes(frag_offsets);
181  if (consistent_frag_sizes.empty()) {
182  // No fragments in the input, no underlying buffers will be needed.
183  return;
184  }
185  if (!ra_exe_unit.use_bump_allocator) {
186  check_total_bitmap_memory(query_mem_desc);
187  }
188  if (device_type == ExecutorDeviceType::GPU) {
189  allocateCountDistinctGpuMem(query_mem_desc);
190  }
191 
192  if (render_allocator_map || !query_mem_desc.isGroupBy()) {
193  allocateCountDistinctBuffers(query_mem_desc, false, executor);
194  if (render_info && render_info->useCudaBuffers()) {
195  return;
196  }
197  }
198 
199  if (ra_exe_unit.estimator) {
200  return;
201  }
202 
203  const auto thread_count = device_type == ExecutorDeviceType::GPU
204  ? executor->blockSize() * executor->gridSize()
205  : 1;
206 
207  size_t group_buffer_size{0};
208  if (ra_exe_unit.use_bump_allocator) {
209  // For kernel per fragment execution, just allocate a buffer equivalent to the size of
210  // the fragment
211  if (dispatch_mode == ExecutorDispatchMode::KernelPerFragment) {
212  group_buffer_size = num_rows * query_mem_desc.getRowSize();
213  } else {
214  // otherwise, allocate a GPU buffer equivalent to the maximum GPU allocation size
215  group_buffer_size = g_max_memory_allocation_size / query_mem_desc.getRowSize();
216  }
217  } else {
218  group_buffer_size =
219  query_mem_desc.getBufferSizeBytes(ra_exe_unit, thread_count, device_type);
220  }
221  CHECK_GE(group_buffer_size, size_t(0));
222 
223  const auto group_buffers_count = !query_mem_desc.isGroupBy() ? 1 : num_buffers_;
224  int64_t* group_by_buffer_template{nullptr};
225  if (!query_mem_desc.lazyInitGroups(device_type) && group_buffers_count > 1) {
226  group_by_buffer_template =
227  reinterpret_cast<int64_t*>(row_set_mem_owner_->allocate(group_buffer_size));
228  initGroupByBuffer(group_by_buffer_template,
229  ra_exe_unit,
230  query_mem_desc,
231  device_type,
232  output_columnar,
233  executor);
234  }
235 
236  if (query_mem_desc.interleavedBins(device_type)) {
237  CHECK(query_mem_desc.hasKeylessHash());
238  }
239 
240  const auto step = device_type == ExecutorDeviceType::GPU &&
241  query_mem_desc.threadsShareMemory() &&
242  query_mem_desc.isGroupBy()
243  ? executor->blockSize()
244  : size_t(1);
245  const auto index_buffer_qw = device_type == ExecutorDeviceType::GPU && sort_on_gpu &&
246  query_mem_desc.hasKeylessHash()
247  ? query_mem_desc.getEntryCount()
248  : size_t(0);
249  const auto actual_group_buffer_size =
250  group_buffer_size + index_buffer_qw * sizeof(int64_t);
251  CHECK_GE(actual_group_buffer_size, group_buffer_size);
252 
253  for (size_t i = 0; i < group_buffers_count; i += step) {
254  auto group_by_buffer = alloc_group_by_buffer(
255  actual_group_buffer_size, render_allocator_map, row_set_mem_owner_.get());
256  if (!query_mem_desc.lazyInitGroups(device_type)) {
257  if (group_by_buffer_template) {
258  memcpy(group_by_buffer + index_buffer_qw,
259  group_by_buffer_template,
260  group_buffer_size);
261  } else {
262  initGroupByBuffer(group_by_buffer + index_buffer_qw,
263  ra_exe_unit,
264  query_mem_desc,
265  device_type,
266  output_columnar,
267  executor);
268  }
269  }
270  group_by_buffers_.push_back(group_by_buffer);
271  for (size_t j = 1; j < step; ++j) {
272  group_by_buffers_.push_back(nullptr);
273  }
274  const auto column_frag_offsets =
275  get_col_frag_offsets(ra_exe_unit.target_exprs, frag_offsets);
276  const auto column_frag_sizes =
277  get_consistent_frags_sizes(ra_exe_unit.target_exprs, consistent_frag_sizes);
278  result_sets_.emplace_back(
279  new ResultSet(target_exprs_to_infos(ra_exe_unit.target_exprs, query_mem_desc),
280  executor->getColLazyFetchInfo(ra_exe_unit.target_exprs),
281  col_buffers,
282  column_frag_offsets,
283  column_frag_sizes,
284  device_type,
285  device_id,
288  executor));
289  result_sets_.back()->allocateStorage(reinterpret_cast<int8_t*>(group_by_buffer),
290  executor->plan_state_->init_agg_vals_);
291  for (size_t j = 1; j < step; ++j) {
292  result_sets_.emplace_back(nullptr);
293  }
294  }
295 }
std::vector< Analyzer::Expr * > target_exprs
size_t getBufferSizeBytes(const RelAlgExecutionUnit &ra_exe_unit, const unsigned thread_count, const ExecutorDeviceType device_type) const
bool useCudaBuffers() const
Definition: RenderInfo.cpp:69
const int8_t const int64_t * num_rows
DeviceAllocator * device_allocator_
std::vector< int64_t > allocateCountDistinctBuffers(const QueryMemoryDescriptor &query_mem_desc, const bool deferred, const Executor *executor)
#define CHECK_GE(x, y)
Definition: Logger.h:210
void check_total_bitmap_memory(const QueryMemoryDescriptor &query_mem_desc)
size_t computeNumberOfBuffers(const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, const Executor *executor) const
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
std::vector< int64_t > init_agg_vals_
bool lazyInitGroups(const ExecutorDeviceType) const
size_t g_max_memory_allocation_size
Definition: Execute.cpp:100
const std::shared_ptr< Analyzer::Estimator > estimator
void initGroupByBuffer(int64_t *buffer, const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, const bool output_columnar, const Executor *executor)
std::vector< int64_t * > group_by_buffers_
std::vector< int64_t > get_consistent_frags_sizes(const std::vector< std::vector< uint64_t >> &frag_offsets)
static QueryMemoryDescriptor fixupQueryMemoryDescriptor(const QueryMemoryDescriptor &)
Definition: ResultSet.cpp:509
bool interleavedBins(const ExecutorDeviceType) const
#define CHECK(condition)
Definition: Logger.h:197
std::vector< TargetInfo > target_exprs_to_infos(const std::vector< Analyzer::Expr * > &targets, const QueryMemoryDescriptor &query_mem_desc)
void sort_on_gpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes, ThrustAllocator &alloc)
std::vector< std::unique_ptr< ResultSet > > result_sets_
int64_t * alloc_group_by_buffer(const size_t numBytes, RenderAllocatorMap *render_allocator_map, RowSetMemoryOwner *mem_owner)
void allocateCountDistinctGpuMem(const QueryMemoryDescriptor &query_mem_desc)
std::vector< std::vector< int64_t > > get_col_frag_offsets(const std::vector< Analyzer::Expr * > &target_exprs, const std::vector< std::vector< uint64_t >> &table_frag_offsets)

+ Here is the call graph for this function:

QueryMemoryInitializer::QueryMemoryInitializer ( const TableFunctionExecutionUnit exe_unit,
const QueryMemoryDescriptor query_mem_desc,
const int  device_id,
const ExecutorDeviceType  device_type,
const int64_t  num_rows,
const std::vector< std::vector< const int8_t * >> &  col_buffers,
const std::vector< std::vector< uint64_t >> &  frag_offsets,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
DeviceAllocator device_allocator,
const Executor executor 
)

Definition at line 297 of file QueryMemoryInitializer.cpp.

309  , row_set_mem_owner_(row_set_mem_owner)
const int8_t const int64_t * num_rows
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
std::vector< int64_t > init_agg_vals_
std::vector< Analyzer::Expr * > target_exprs
std::vector< int64_t > init_agg_val_vec(const std::vector< TargetInfo > &targets, const QueryMemoryDescriptor &query_mem_desc)

Member Function Documentation

int64_t QueryMemoryInitializer::allocateCountDistinctBitmap ( const size_t  bitmap_byte_sz)
private

Definition at line 643 of file QueryMemoryInitializer.cpp.

References CHECK, count_distinct_bitmap_crt_ptr_, count_distinct_bitmap_host_mem_, and row_set_mem_owner_.

Referenced by allocateCountDistinctBuffers(), and initColumnPerRow().

643  {
647  count_distinct_bitmap_crt_ptr_ += bitmap_byte_sz;
648  row_set_mem_owner_->addCountDistinctBuffer(
649  ptr, bitmap_byte_sz, /*physial_buffer=*/false);
650  return reinterpret_cast<int64_t>(ptr);
651  }
652  return reinterpret_cast<int64_t>(
653  row_set_mem_owner_->allocateCountDistinctBuffer(bitmap_byte_sz));
654 }
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the caller graph for this function:

std::vector< int64_t > QueryMemoryInitializer::allocateCountDistinctBuffers ( const QueryMemoryDescriptor query_mem_desc,
const bool  deferred,
const Executor executor 
)
private

Definition at line 597 of file QueryMemoryInitializer.cpp.

References agg_col_count, allocateCountDistinctBitmap(), allocateCountDistinctSet(), Bitmap, CHECK, CHECK_EQ, CHECK_GE, CHECK_LT, g_bigint_count, get_target_info(), QueryMemoryDescriptor::getCountDistinctDescriptor(), QueryMemoryDescriptor::getLogicalSlotWidthBytes(), QueryMemoryDescriptor::getSlotCount(), QueryMemoryDescriptor::getSlotIndexForSingleSlotCol(), init_agg_vals_, Invalid, is_distinct_target(), kAPPROX_COUNT_DISTINCT, kCOUNT, and StdSet.

Referenced by initGroups(), and QueryMemoryInitializer().

600  {
601  const size_t agg_col_count{query_mem_desc.getSlotCount()};
602  std::vector<int64_t> agg_bitmap_size(deferred ? agg_col_count : 0);
603 
604  CHECK_GE(agg_col_count, executor->plan_state_->target_exprs_.size());
605  for (size_t target_idx = 0; target_idx < executor->plan_state_->target_exprs_.size();
606  ++target_idx) {
607  const auto target_expr = executor->plan_state_->target_exprs_[target_idx];
608  const auto agg_info = get_target_info(target_expr, g_bigint_count);
609  if (is_distinct_target(agg_info)) {
610  CHECK(agg_info.is_agg &&
611  (agg_info.agg_kind == kCOUNT || agg_info.agg_kind == kAPPROX_COUNT_DISTINCT));
612  CHECK(!agg_info.sql_type.is_varlen());
613 
614  const auto agg_col_idx = query_mem_desc.getSlotIndexForSingleSlotCol(target_idx);
615  CHECK_LT(static_cast<size_t>(agg_col_idx), agg_col_count);
616 
617  CHECK_EQ(static_cast<size_t>(query_mem_desc.getLogicalSlotWidthBytes(agg_col_idx)),
618  sizeof(int64_t));
619  const auto& count_distinct_desc =
620  query_mem_desc.getCountDistinctDescriptor(target_idx);
621  CHECK(count_distinct_desc.impl_type_ != CountDistinctImplType::Invalid);
622  if (count_distinct_desc.impl_type_ == CountDistinctImplType::Bitmap) {
623  const auto bitmap_byte_sz = count_distinct_desc.bitmapPaddedSizeBytes();
624  if (deferred) {
625  agg_bitmap_size[agg_col_idx] = bitmap_byte_sz;
626  } else {
627  init_agg_vals_[agg_col_idx] = allocateCountDistinctBitmap(bitmap_byte_sz);
628  }
629  } else {
630  CHECK(count_distinct_desc.impl_type_ == CountDistinctImplType::StdSet);
631  if (deferred) {
632  agg_bitmap_size[agg_col_idx] = -1;
633  } else {
634  init_agg_vals_[agg_col_idx] = allocateCountDistinctSet();
635  }
636  }
637  }
638  }
639 
640  return agg_bitmap_size;
641 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
const int64_t const uint32_t const uint32_t const uint32_t agg_col_count
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:78
#define CHECK_GE(x, y)
Definition: Logger.h:210
std::vector< int64_t > init_agg_vals_
bool g_bigint_count
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:129
const CountDistinctDescriptor & getCountDistinctDescriptor(const size_t idx) const
#define CHECK_LT(x, y)
Definition: Logger.h:207
Definition: sqldefs.h:76
#define CHECK(condition)
Definition: Logger.h:197
const int8_t getSlotIndexForSingleSlotCol(const size_t col_idx) const
const int8_t getLogicalSlotWidthBytes(const size_t slot_idx) const
int64_t allocateCountDistinctBitmap(const size_t bitmap_byte_sz)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void QueryMemoryInitializer::allocateCountDistinctGpuMem ( const QueryMemoryDescriptor query_mem_desc)
private

Definition at line 565 of file QueryMemoryInitializer.cpp.

References Allocator::alloc(), Bitmap, CHECK, count_distinct_bitmap_crt_ptr_, count_distinct_bitmap_host_mem_, count_distinct_bitmap_mem_, count_distinct_bitmap_mem_bytes_, QueryMemoryDescriptor::countDistinctDescriptorsLogicallyEmpty(), device_allocator_, QueryMemoryDescriptor::getCountDistinctDescriptor(), QueryMemoryDescriptor::getCountDistinctDescriptorsSize(), QueryMemoryDescriptor::getEntryCount(), Invalid, row_set_mem_owner_, and DeviceAllocator::zeroDeviceMem().

Referenced by QueryMemoryInitializer().

566  {
567  if (query_mem_desc.countDistinctDescriptorsLogicallyEmpty()) {
568  return;
569  }
571 
572  size_t total_bytes_per_entry{0};
573  const size_t num_count_distinct_descs =
574  query_mem_desc.getCountDistinctDescriptorsSize();
575  for (size_t i = 0; i < num_count_distinct_descs; i++) {
576  const auto count_distinct_desc = query_mem_desc.getCountDistinctDescriptor(i);
577  if (count_distinct_desc.impl_type_ == CountDistinctImplType::Invalid) {
578  continue;
579  }
580  CHECK(count_distinct_desc.impl_type_ == CountDistinctImplType::Bitmap);
581  total_bytes_per_entry += count_distinct_desc.bitmapPaddedSizeBytes();
582  }
583 
585  total_bytes_per_entry * query_mem_desc.getEntryCount();
586  count_distinct_bitmap_mem_ = reinterpret_cast<CUdeviceptr>(
588  device_allocator_->zeroDeviceMem(reinterpret_cast<int8_t*>(count_distinct_bitmap_mem_),
590 
593 }
bool countDistinctDescriptorsLogicallyEmpty() const
DeviceAllocator * device_allocator_
unsigned long long CUdeviceptr
Definition: nocuda.h:27
virtual int8_t * alloc(const size_t num_bytes)=0
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
virtual void zeroDeviceMem(int8_t *device_ptr, const size_t num_bytes) const =0
size_t getCountDistinctDescriptorsSize() const
const CountDistinctDescriptor & getCountDistinctDescriptor(const size_t idx) const
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int64_t QueryMemoryInitializer::allocateCountDistinctSet ( )
private

Definition at line 656 of file QueryMemoryInitializer.cpp.

References row_set_mem_owner_.

Referenced by allocateCountDistinctBuffers(), and initColumnPerRow().

656  {
657  auto count_distinct_set = new std::set<int64_t>();
658  row_set_mem_owner_->addCountDistinctSet(count_distinct_set);
659  return reinterpret_cast<int64_t>(count_distinct_set);
660 }
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_

+ Here is the caller graph for this function:

void QueryMemoryInitializer::applyStreamingTopNOffsetCpu ( const QueryMemoryDescriptor query_mem_desc,
const RelAlgExecutionUnit ra_exe_unit 
)
private

Definition at line 998 of file QueryMemoryInitializer.cpp.

References CHECK_EQ, CPU, streaming_top_n::get_rows_copy_from_heaps(), QueryMemoryDescriptor::getBufferSizeBytes(), QueryMemoryDescriptor::getEntryCount(), QueryMemoryDescriptor::getRowSize(), group_by_buffers_, SortInfo::limit, SortInfo::offset, and RelAlgExecutionUnit::sort_info.

1000  {
1001  CHECK_EQ(group_by_buffers_.size(), size_t(1));
1002 
1003  const auto rows_copy = streaming_top_n::get_rows_copy_from_heaps(
1004  group_by_buffers_[0],
1005  query_mem_desc.getBufferSizeBytes(ra_exe_unit, 1, ExecutorDeviceType::CPU),
1006  ra_exe_unit.sort_info.offset + ra_exe_unit.sort_info.limit,
1007  1);
1008  CHECK_EQ(rows_copy.size(),
1009  query_mem_desc.getEntryCount() * query_mem_desc.getRowSize());
1010  memcpy(group_by_buffers_[0], &rows_copy[0], rows_copy.size());
1011 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
size_t getBufferSizeBytes(const RelAlgExecutionUnit &ra_exe_unit, const unsigned thread_count, const ExecutorDeviceType device_type) const
const size_t limit
const SortInfo sort_info
std::vector< int64_t * > group_by_buffers_
std::vector< int8_t > get_rows_copy_from_heaps(const int64_t *heaps, const size_t heaps_size, const size_t n, const size_t thread_count)
const size_t offset

+ Here is the call graph for this function:

void QueryMemoryInitializer::applyStreamingTopNOffsetGpu ( Data_Namespace::DataMgr data_mgr,
const QueryMemoryDescriptor query_mem_desc,
const GpuGroupByBuffers gpu_group_by_buffers,
const RelAlgExecutionUnit ra_exe_unit,
const unsigned  total_thread_count,
const int  device_id 
)
private

Definition at line 1013 of file QueryMemoryInitializer.cpp.

References CHECK_EQ, QueryMemoryDescriptor::getEntryCount(), QueryMemoryDescriptor::getRowSize(), group_by_buffers_, num_buffers_, GpuGroupByBuffers::second, and UNREACHABLE.

1019  {
1020 #ifdef HAVE_CUDA
1022 
1023  const auto rows_copy = pick_top_n_rows_from_dev_heaps(
1024  data_mgr,
1025  reinterpret_cast<int64_t*>(gpu_group_by_buffers.second),
1026  ra_exe_unit,
1027  query_mem_desc,
1028  total_thread_count,
1029  device_id);
1030  CHECK_EQ(
1031  rows_copy.size(),
1032  static_cast<size_t>(query_mem_desc.getEntryCount() * query_mem_desc.getRowSize()));
1033  memcpy(group_by_buffers_[0], &rows_copy[0], rows_copy.size());
1034 #else
1035  UNREACHABLE();
1036 #endif
1037 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
#define UNREACHABLE()
Definition: Logger.h:241
CUdeviceptr second
Definition: GpuMemUtils.h:61
std::vector< int64_t * > group_by_buffers_

+ Here is the call graph for this function:

void QueryMemoryInitializer::compactProjectionBuffersCpu ( const QueryMemoryDescriptor query_mem_desc,
const size_t  projection_count 
)
private

Definition at line 925 of file QueryMemoryInitializer.cpp.

References CHECK, anonymous_namespace{QueryMemoryInitializer.cpp}::compact_projection_buffer_for_cpu_columnar(), QueryMemoryDescriptor::getEntryCount(), group_by_buffers_, and result_sets_.

927  {
928  const auto num_allocated_rows =
929  std::min(projection_count, query_mem_desc.getEntryCount());
930 
931  // copy the results from the main buffer into projection_buffer
933  query_mem_desc,
934  reinterpret_cast<int8_t*>(group_by_buffers_[0]),
935  num_allocated_rows);
936 
937  // update the entry count for the result set, and its underlying storage
938  CHECK(!result_sets_.empty());
939  result_sets_.front()->updateStorageEntryCount(num_allocated_rows);
940 }
void compact_projection_buffer_for_cpu_columnar(const QueryMemoryDescriptor &query_mem_desc, int8_t *projection_buffer, const size_t projection_count)
std::vector< int64_t * > group_by_buffers_
#define CHECK(condition)
Definition: Logger.h:197
std::vector< std::unique_ptr< ResultSet > > result_sets_

+ Here is the call graph for this function:

void QueryMemoryInitializer::compactProjectionBuffersGpu ( const QueryMemoryDescriptor query_mem_desc,
Data_Namespace::DataMgr data_mgr,
const GpuGroupByBuffers gpu_group_by_buffers,
const size_t  projection_count,
const int  device_id 
)
private

Definition at line 942 of file QueryMemoryInitializer.cpp.

References CHECK, copy_projection_buffer_from_gpu_columnar(), QueryMemoryDescriptor::getEntryCount(), group_by_buffers_, and result_sets_.

947  {
948  // store total number of allocated rows:
949  const auto num_allocated_rows =
950  std::min(projection_count, query_mem_desc.getEntryCount());
951 
952  // copy the results from the main buffer into projection_buffer
954  data_mgr,
955  gpu_group_by_buffers,
956  query_mem_desc,
957  reinterpret_cast<int8_t*>(group_by_buffers_[0]),
958  num_allocated_rows,
959  device_id);
960 
961  // update the entry count for the result set, and its underlying storage
962  CHECK(!result_sets_.empty());
963  result_sets_.front()->updateStorageEntryCount(num_allocated_rows);
964 }
std::vector< int64_t * > group_by_buffers_
#define CHECK(condition)
Definition: Logger.h:197
void copy_projection_buffer_from_gpu_columnar(Data_Namespace::DataMgr *data_mgr, const GpuGroupByBuffers &gpu_group_by_buffers, const QueryMemoryDescriptor &query_mem_desc, int8_t *projection_buffer, const size_t projection_count, const int device_id)
std::vector< std::unique_ptr< ResultSet > > result_sets_

+ Here is the call graph for this function:

size_t QueryMemoryInitializer::computeNumberOfBuffers ( const QueryMemoryDescriptor query_mem_desc,
const ExecutorDeviceType  device_type,
const Executor executor 
) const
private

Definition at line 881 of file QueryMemoryInitializer.cpp.

References QueryMemoryDescriptor::blocksShareMemory(), and CPU.

884  {
885  return device_type == ExecutorDeviceType::CPU
886  ? 1
887  : executor->blockSize() *
888  (query_mem_desc.blocksShareMemory() ? 1 : executor->gridSize());
889 }

+ Here is the call graph for this function:

void QueryMemoryInitializer::copyGroupByBuffersFromGpu ( Data_Namespace::DataMgr data_mgr,
const QueryMemoryDescriptor query_mem_desc,
const size_t  entry_count,
const GpuGroupByBuffers gpu_group_by_buffers,
const RelAlgExecutionUnit ra_exe_unit,
const unsigned  block_size_x,
const unsigned  grid_size_x,
const int  device_id,
const bool  prepend_index_buffer 
) const

Definition at line 966 of file QueryMemoryInitializer.cpp.

References copy_group_by_buffers_from_gpu(), streaming_top_n::get_heap_size(), QueryMemoryDescriptor::getBufferSizeBytes(), QueryMemoryDescriptor::getRowSize(), GPU, group_by_buffers_, SortInfo::limit, SortInfo::offset, GpuGroupByBuffers::second, RelAlgExecutionUnit::sort_info, and QueryMemoryDescriptor::useStreamingTopN().

975  {
976  const auto thread_count = block_size_x * grid_size_x;
977 
978  size_t total_buff_size{0};
979  if (ra_exe_unit && query_mem_desc.useStreamingTopN()) {
980  const size_t n = ra_exe_unit->sort_info.offset + ra_exe_unit->sort_info.limit;
981  total_buff_size =
982  streaming_top_n::get_heap_size(query_mem_desc.getRowSize(), n, thread_count);
983  } else {
984  total_buff_size =
985  query_mem_desc.getBufferSizeBytes(ExecutorDeviceType::GPU, entry_count);
986  }
989  total_buff_size,
990  gpu_group_by_buffers.second,
991  query_mem_desc,
992  block_size_x,
993  grid_size_x,
994  device_id,
995  prepend_index_buffer);
996 }
size_t getBufferSizeBytes(const RelAlgExecutionUnit &ra_exe_unit, const unsigned thread_count, const ExecutorDeviceType device_type) const
const size_t limit
CUdeviceptr second
Definition: GpuMemUtils.h:61
const SortInfo sort_info
std::vector< int64_t * > group_by_buffers_
void copy_group_by_buffers_from_gpu(Data_Namespace::DataMgr *data_mgr, const std::vector< int64_t * > &group_by_buffers, const size_t groups_buffer_size, const CUdeviceptr group_by_dev_buffers_mem, const QueryMemoryDescriptor &query_mem_desc, const unsigned block_size_x, const unsigned grid_size_x, const int device_id, const bool prepend_index_buffer)
size_t get_heap_size(const size_t row_size, const size_t n, const size_t thread_count)
const size_t offset

+ Here is the call graph for this function:

int64_t QueryMemoryInitializer::getAggInitValForIndex ( const size_t  index) const
inline

Definition at line 89 of file QueryMemoryInitializer.h.

References CHECK_LT, and init_agg_vals_.

89  {
90  CHECK_LT(index, init_agg_vals_.size());
91  return init_agg_vals_[index];
92  }
std::vector< int64_t > init_agg_vals_
#define CHECK_LT(x, y)
Definition: Logger.h:207
const auto QueryMemoryInitializer::getCountDistinctBitmapBytes ( ) const
inline

Definition at line 70 of file QueryMemoryInitializer.h.

References count_distinct_bitmap_mem_bytes_.

70  {
72  }
const auto QueryMemoryInitializer::getCountDistinctBitmapPtr ( ) const
inline

Definition at line 66 of file QueryMemoryInitializer.h.

References count_distinct_bitmap_mem_.

const auto QueryMemoryInitializer::getCountDistinctHostPtr ( ) const
inline

Definition at line 68 of file QueryMemoryInitializer.h.

References count_distinct_bitmap_host_mem_.

const auto QueryMemoryInitializer::getGroupByBuffersPtr ( )
inline

Definition at line 94 of file QueryMemoryInitializer.h.

References group_by_buffers_.

94  {
95  return reinterpret_cast<int64_t**>(group_by_buffers_.data());
96  }
std::vector< int64_t * > group_by_buffers_
const auto QueryMemoryInitializer::getGroupByBuffersSize ( ) const
inline

Definition at line 98 of file QueryMemoryInitializer.h.

References group_by_buffers_.

98 { return group_by_buffers_.size(); }
std::vector< int64_t * > group_by_buffers_
const auto QueryMemoryInitializer::getNumBuffers ( ) const
inline

Definition at line 100 of file QueryMemoryInitializer.h.

References CHECK_EQ, group_by_buffers_, and num_buffers_.

100  {
102  return num_buffers_;
103  }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
std::vector< int64_t * > group_by_buffers_
ResultSet* QueryMemoryInitializer::getResultSet ( const size_t  index) const
inline

Definition at line 74 of file QueryMemoryInitializer.h.

References CHECK_LT, and result_sets_.

74  {
75  CHECK_LT(index, result_sets_.size());
76  return result_sets_[index].get();
77  }
#define CHECK_LT(x, y)
Definition: Logger.h:207
std::vector< std::unique_ptr< ResultSet > > result_sets_
std::unique_ptr<ResultSet> QueryMemoryInitializer::getResultSetOwned ( const size_t  index)
inline

Definition at line 79 of file QueryMemoryInitializer.h.

References CHECK_LT, and result_sets_.

79  {
80  CHECK_LT(index, result_sets_.size());
81  return std::move(result_sets_[index]);
82  }
#define CHECK_LT(x, y)
Definition: Logger.h:207
std::vector< std::unique_ptr< ResultSet > > result_sets_
void QueryMemoryInitializer::initColumnarGroups ( const QueryMemoryDescriptor query_mem_desc,
int64_t *  groups_buffer,
const std::vector< int64_t > &  init_vals,
const Executor executor 
)
private

Definition at line 457 of file QueryMemoryInitializer.cpp.

References agg_col_count, align_to_int64(), CHECK, CHECK_LT, EMPTY_KEY_64, g_bigint_count, get_target_info(), QueryMemoryDescriptor::getEntryCount(), QueryMemoryDescriptor::getGroupbyColCount(), QueryMemoryDescriptor::getPaddedSlotWidthBytes(), QueryMemoryDescriptor::getQueryDescriptionType(), QueryMemoryDescriptor::getSlotCount(), groups_buffer, groups_buffer_entry_count, QueryMemoryDescriptor::hasKeylessHash(), is_distinct_target(), and Projection.

Referenced by initGroupByBuffer().

461  {
463  for (const auto target_expr : executor->plan_state_->target_exprs_) {
464  const auto agg_info = get_target_info(target_expr, g_bigint_count);
465  CHECK(!is_distinct_target(agg_info));
466  }
467  const int32_t agg_col_count = query_mem_desc.getSlotCount();
468  auto buffer_ptr = reinterpret_cast<int8_t*>(groups_buffer);
469 
470  const auto groups_buffer_entry_count = query_mem_desc.getEntryCount();
471  if (!query_mem_desc.hasKeylessHash()) {
472  const size_t key_count{query_mem_desc.getGroupbyColCount()};
473  for (size_t i = 0; i < key_count; ++i) {
474  buffer_ptr = initColumnarBuffer<int64_t>(reinterpret_cast<int64_t*>(buffer_ptr),
475  EMPTY_KEY_64,
477  }
478  }
479 
481  // initializing all aggregate columns:
482  int32_t init_val_idx = 0;
483  for (int32_t i = 0; i < agg_col_count; ++i) {
484  if (query_mem_desc.getPaddedSlotWidthBytes(i) > 0) {
485  CHECK_LT(static_cast<size_t>(init_val_idx), init_vals.size());
486  switch (query_mem_desc.getPaddedSlotWidthBytes(i)) {
487  case 1:
488  buffer_ptr = initColumnarBuffer<int8_t>(
489  buffer_ptr, init_vals[init_val_idx++], groups_buffer_entry_count);
490  break;
491  case 2:
492  buffer_ptr =
493  initColumnarBuffer<int16_t>(reinterpret_cast<int16_t*>(buffer_ptr),
494  init_vals[init_val_idx++],
496  break;
497  case 4:
498  buffer_ptr =
499  initColumnarBuffer<int32_t>(reinterpret_cast<int32_t*>(buffer_ptr),
500  init_vals[init_val_idx++],
502  break;
503  case 8:
504  buffer_ptr =
505  initColumnarBuffer<int64_t>(reinterpret_cast<int64_t*>(buffer_ptr),
506  init_vals[init_val_idx++],
508  break;
509  case 0:
510  break;
511  default:
512  CHECK(false);
513  }
514 
515  buffer_ptr = align_to_int64(buffer_ptr);
516  }
517  }
518  }
519 }
const int32_t groups_buffer_size return groups_buffer
const int64_t const uint32_t const uint32_t const uint32_t agg_col_count
#define EMPTY_KEY_64
TargetInfo get_target_info(const PointerType target_expr, const bool bigint_count)
Definition: TargetInfo.h:78
const int64_t const uint32_t groups_buffer_entry_count
size_t getGroupbyColCount() const
bool g_bigint_count
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:129
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
QueryDescriptionType getQueryDescriptionType() const
#define CHECK_LT(x, y)
Definition: Logger.h:207
#define CHECK(condition)
Definition: Logger.h:197
const int64_t * init_vals
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void QueryMemoryInitializer::initColumnPerRow ( const QueryMemoryDescriptor query_mem_desc,
int8_t *  row_ptr,
const size_t  bin,
const std::vector< int64_t > &  init_vals,
const std::vector< int64_t > &  bitmap_sizes 
)
private

Definition at line 521 of file QueryMemoryInitializer.cpp.

References allocateCountDistinctBitmap(), allocateCountDistinctSet(), CHECK, CHECK_EQ, CHECK_LT, QueryMemoryDescriptor::getNextColOffInBytes(), QueryMemoryDescriptor::getPaddedSlotWidthBytes(), QueryMemoryDescriptor::getSlotCount(), and QueryMemoryDescriptor::isGroupBy().

Referenced by initGroups().

525  {
526  int8_t* col_ptr = row_ptr;
527  size_t init_vec_idx = 0;
528  for (size_t col_idx = 0; col_idx < query_mem_desc.getSlotCount();
529  col_ptr += query_mem_desc.getNextColOffInBytes(col_ptr, bin, col_idx++)) {
530  const int64_t bm_sz{bitmap_sizes[col_idx]};
531  int64_t init_val{0};
532  if (!bm_sz || !query_mem_desc.isGroupBy()) {
533  if (query_mem_desc.getPaddedSlotWidthBytes(col_idx) > 0) {
534  CHECK_LT(init_vec_idx, init_vals.size());
535  init_val = init_vals[init_vec_idx++];
536  }
537  } else {
538  CHECK_EQ(static_cast<size_t>(query_mem_desc.getPaddedSlotWidthBytes(col_idx)),
539  sizeof(int64_t));
540  init_val =
542  ++init_vec_idx;
543  }
544  switch (query_mem_desc.getPaddedSlotWidthBytes(col_idx)) {
545  case 1:
546  *col_ptr = static_cast<int8_t>(init_val);
547  break;
548  case 2:
549  *reinterpret_cast<int16_t*>(col_ptr) = (int16_t)init_val;
550  break;
551  case 4:
552  *reinterpret_cast<int32_t*>(col_ptr) = (int32_t)init_val;
553  break;
554  case 8:
555  *reinterpret_cast<int64_t*>(col_ptr) = init_val;
556  break;
557  case 0:
558  continue;
559  default:
560  CHECK(false);
561  }
562  }
563 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
size_t getNextColOffInBytes(const int8_t *col_ptr, const size_t bin, const size_t col_idx) const
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
#define CHECK_LT(x, y)
Definition: Logger.h:207
#define CHECK(condition)
Definition: Logger.h:197
const int64_t * init_vals
int64_t allocateCountDistinctBitmap(const size_t bitmap_byte_sz)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void QueryMemoryInitializer::initGroupByBuffer ( int64_t *  buffer,
const RelAlgExecutionUnit ra_exe_unit,
const QueryMemoryDescriptor query_mem_desc,
const ExecutorDeviceType  device_type,
const bool  output_columnar,
const Executor executor 
)
private

Definition at line 365 of file QueryMemoryInitializer.cpp.

References streaming_top_n::get_rows_offset_of_heaps(), QueryMemoryDescriptor::getEntryCount(), GPU, init_agg_vals_, initColumnarGroups(), initGroups(), QueryMemoryDescriptor::interleavedBins(), SortInfo::limit, SortInfo::offset, RelAlgExecutionUnit::sort_info, and QueryMemoryDescriptor::useStreamingTopN().

Referenced by QueryMemoryInitializer().

371  {
372  if (output_columnar) {
373  initColumnarGroups(query_mem_desc, buffer, init_agg_vals_, executor);
374  } else {
375  auto rows_ptr = buffer;
376  auto actual_entry_count = query_mem_desc.getEntryCount();
377  const auto thread_count = device_type == ExecutorDeviceType::GPU
378  ? executor->blockSize() * executor->gridSize()
379  : 1;
380  auto warp_size =
381  query_mem_desc.interleavedBins(device_type) ? executor->warpSize() : 1;
382  if (query_mem_desc.useStreamingTopN()) {
383  const auto node_count_size = thread_count * sizeof(int64_t);
384  memset(rows_ptr, 0, node_count_size);
385  const auto n = ra_exe_unit.sort_info.offset + ra_exe_unit.sort_info.limit;
386  const auto rows_offset = streaming_top_n::get_rows_offset_of_heaps(n, thread_count);
387  memset(rows_ptr + thread_count, -1, rows_offset - node_count_size);
388  rows_ptr += rows_offset / sizeof(int64_t);
389  actual_entry_count = n * thread_count;
390  warp_size = 1;
391  }
392  initGroups(query_mem_desc,
393  rows_ptr,
395  actual_entry_count,
396  warp_size,
397  executor);
398  }
399 }
void initGroups(const QueryMemoryDescriptor &query_mem_desc, int64_t *groups_buffer, const std::vector< int64_t > &init_vals, const int32_t groups_buffer_entry_count, const size_t warp_size, const Executor *executor)
size_t get_rows_offset_of_heaps(const size_t n, const size_t thread_count)
const size_t limit
std::vector< int64_t > init_agg_vals_
const SortInfo sort_info
void initColumnarGroups(const QueryMemoryDescriptor &query_mem_desc, int64_t *groups_buffer, const std::vector< int64_t > &init_vals, const Executor *executor)
bool interleavedBins(const ExecutorDeviceType) const
const size_t offset

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void QueryMemoryInitializer::initGroups ( const QueryMemoryDescriptor query_mem_desc,
int64_t *  groups_buffer,
const std::vector< int64_t > &  init_vals,
const int32_t  groups_buffer_entry_count,
const size_t  warp_size,
const Executor executor 
)
private

Definition at line 401 of file QueryMemoryInitializer.cpp.

References allocateCountDistinctBuffers(), CHECK, fill_empty_key(), ResultSet::fixupQueryMemoryDescriptor(), QueryMemoryDescriptor::getColOffInBytes(), QueryMemoryDescriptor::getEffectiveKeyWidth(), QueryMemoryDescriptor::getGroupbyColCount(), QueryMemoryDescriptor::getRowSize(), groups_buffer, groups_buffer_entry_count, QueryMemoryDescriptor::hasKeylessHash(), and initColumnPerRow().

Referenced by initGroupByBuffer().

406  {
407  const size_t key_count{query_mem_desc.getGroupbyColCount()};
408  const size_t row_size{query_mem_desc.getRowSize()};
409  const size_t col_base_off{query_mem_desc.getColOffInBytes(0)};
410 
411  auto agg_bitmap_size = allocateCountDistinctBuffers(query_mem_desc, true, executor);
412  auto buffer_ptr = reinterpret_cast<int8_t*>(groups_buffer);
413 
414  const auto query_mem_desc_fixedup =
416 
417  if (query_mem_desc.hasKeylessHash()) {
418  CHECK(warp_size >= 1);
419  CHECK(key_count == 1 || warp_size == 1);
420  for (size_t warp_idx = 0; warp_idx < warp_size; ++warp_idx) {
421  for (size_t bin = 0; bin < static_cast<size_t>(groups_buffer_entry_count);
422  ++bin, buffer_ptr += row_size) {
423  initColumnPerRow(query_mem_desc_fixedup,
424  &buffer_ptr[col_base_off],
425  bin,
426  init_vals,
427  agg_bitmap_size);
428  }
429  }
430  return;
431  }
432 
433  for (size_t bin = 0; bin < static_cast<size_t>(groups_buffer_entry_count);
434  ++bin, buffer_ptr += row_size) {
435  fill_empty_key(buffer_ptr, key_count, query_mem_desc.getEffectiveKeyWidth());
436  initColumnPerRow(query_mem_desc_fixedup,
437  &buffer_ptr[col_base_off],
438  bin,
439  init_vals,
440  agg_bitmap_size);
441  }
442 }
const int32_t groups_buffer_size return groups_buffer
void fill_empty_key(void *key_ptr, const size_t key_count, const size_t key_width)
std::vector< int64_t > allocateCountDistinctBuffers(const QueryMemoryDescriptor &query_mem_desc, const bool deferred, const Executor *executor)
size_t getEffectiveKeyWidth() const
const int64_t const uint32_t groups_buffer_entry_count
void initColumnPerRow(const QueryMemoryDescriptor &query_mem_desc, int8_t *row_ptr, const size_t bin, const std::vector< int64_t > &init_vals, const std::vector< int64_t > &bitmap_sizes)
size_t getGroupbyColCount() const
static QueryMemoryDescriptor fixupQueryMemoryDescriptor(const QueryMemoryDescriptor &)
Definition: ResultSet.cpp:509
#define CHECK(condition)
Definition: Logger.h:197
const int64_t * init_vals
size_t getColOffInBytes(const size_t col_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void QueryMemoryInitializer::resetResultSet ( const size_t  index)
inline

Definition at line 84 of file QueryMemoryInitializer.h.

References CHECK_LT, and result_sets_.

84  {
85  CHECK_LT(index, result_sets_.size());
86  result_sets_[index].reset();
87  }
#define CHECK_LT(x, y)
Definition: Logger.h:207
std::vector< std::unique_ptr< ResultSet > > result_sets_

Friends And Related Function Documentation

friend class Executor
friend

Definition at line 228 of file QueryMemoryInitializer.h.

friend class QueryExecutionContext
friend

Definition at line 229 of file QueryMemoryInitializer.h.

Member Data Documentation

int8_t* QueryMemoryInitializer::count_distinct_bitmap_crt_ptr_
private
int8_t* QueryMemoryInitializer::count_distinct_bitmap_host_mem_
private
CUdeviceptr QueryMemoryInitializer::count_distinct_bitmap_mem_
private
size_t QueryMemoryInitializer::count_distinct_bitmap_mem_bytes_
private
DeviceAllocator* QueryMemoryInitializer::device_allocator_ {nullptr}
private

Definition at line 225 of file QueryMemoryInitializer.h.

Referenced by allocateCountDistinctGpuMem().

std::vector<int64_t> QueryMemoryInitializer::init_agg_vals_
private
const size_t QueryMemoryInitializer::num_buffers_
private
const int64_t QueryMemoryInitializer::num_rows_
private

Definition at line 211 of file QueryMemoryInitializer.h.

std::vector<std::unique_ptr<ResultSet> > QueryMemoryInitializer::result_sets_
private
std::shared_ptr<RowSetMemoryOwner> QueryMemoryInitializer::row_set_mem_owner_
private
std::vector<Data_Namespace::AbstractBuffer*> QueryMemoryInitializer::temporary_buffers_
private

Definition at line 226 of file QueryMemoryInitializer.h.


The documentation for this class was generated from the following files: