OmniSciDB  5ade3759e0
QueryMemoryDescriptor Class Reference

#include <QueryMemoryDescriptor.h>

+ Collaboration diagram for QueryMemoryDescriptor:

Public Member Functions

 QueryMemoryDescriptor ()
 
 QueryMemoryDescriptor (const Executor *executor, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const bool allow_multifrag, const bool keyless_hash, const bool interleaved_bins_on_gpu, const int32_t idx_target_as_key, const ColRangeInfo &col_range_info, const ColSlotContext &col_slot_context, const std::vector< int8_t > &group_col_widths, const int8_t group_col_compact_width, const std::vector< ssize_t > &target_groupby_indices, const size_t entry_count, const GroupByMemSharing sharing, const bool shared_mem_for_group_by, const CountDistinctDescriptors count_distinct_descriptors, const bool sort_on_gpu_hint, const bool output_columnar, const bool render_output, const bool must_use_baseline_sort)
 
 QueryMemoryDescriptor (const Executor *executor, const size_t entry_count, const QueryDescriptionType query_desc_type)
 
 QueryMemoryDescriptor (const QueryDescriptionType query_desc_type, const int64_t min_val, const int64_t max_val, const bool has_nulls, const std::vector< int8_t > &group_col_widths)
 
 QueryMemoryDescriptor (const TResultSetBufferDescriptor &thrift_query_memory_descriptor)
 
bool operator== (const QueryMemoryDescriptor &other) const
 
std::unique_ptr< QueryExecutionContextgetQueryExecutionContext (const RelAlgExecutionUnit &, const Executor *executor, const ExecutorDeviceType device_type, const ExecutorDispatchMode dispatch_mode, const int device_id, const int64_t num_rows, const std::vector< std::vector< const int8_t *>> &col_buffers, const std::vector< std::vector< uint64_t >> &frag_offsets, std::shared_ptr< RowSetMemoryOwner >, const bool output_columnar, const bool sort_on_gpu, RenderInfo *) const
 
bool countDistinctDescriptorsLogicallyEmpty () const
 
const ExecutorgetExecutor () const
 
QueryDescriptionType getQueryDescriptionType () const
 
void setQueryDescriptionType (const QueryDescriptionType val)
 
bool isSingleColumnGroupByWithPerfectHash () const
 
bool hasKeylessHash () const
 
void setHasKeylessHash (const bool val)
 
bool hasInterleavedBinsOnGpu () const
 
void setHasInterleavedBinsOnGpu (const bool val)
 
int32_t getTargetIdxForKey () const
 
void setTargetIdxForKey (const int32_t val)
 
size_t groupColWidthsSize () const
 
int8_t groupColWidth (const size_t key_idx) const
 
size_t getPrependedGroupColOffInBytes (const size_t group_idx) const
 
size_t getPrependedGroupBufferSizeInBytes () const
 
const auto groupColWidthsBegin () const
 
const auto groupColWidthsEnd () const
 
void clearGroupColWidths ()
 
bool isGroupBy () const
 
void setGroupColCompactWidth (const int8_t val)
 
size_t getColCount () const
 
size_t getSlotCount () const
 
const int8_t getPaddedSlotWidthBytes (const size_t slot_idx) const
 
const int8_t getLogicalSlotWidthBytes (const size_t slot_idx) const
 
const int8_t getSlotIndexForSingleSlotCol (const size_t col_idx) const
 
size_t getPaddedColWidthForRange (const size_t offset, const size_t range) const
 
void useConsistentSlotWidthSize (const int8_t slot_width_size)
 
size_t getRowWidth () const
 
int8_t updateActualMinByteWidth (const int8_t actual_min_byte_width) const
 
void addColSlotInfo (const std::vector< std::tuple< int8_t, int8_t >> &slots_for_col)
 
void clearSlotInfo ()
 
void alignPaddedSlots ()
 
ssize_t getTargetGroupbyIndex (const size_t target_idx) const
 
size_t targetGroupbyIndicesSize () const
 
void clearTargetGroupbyIndices ()
 
size_t getEntryCount () const
 
void setEntryCount (const size_t val)
 
int64_t getMinVal () const
 
int64_t getMaxVal () const
 
int64_t getBucket () const
 
bool hasNulls () const
 
GroupByMemSharing getGpuMemSharing () const
 
const CountDistinctDescriptorgetCountDistinctDescriptor (const size_t idx) const
 
size_t getCountDistinctDescriptorsSize () const
 
bool sortOnGpu () const
 
bool canOutputColumnar () const
 
bool didOutputColumnar () const
 
void setOutputColumnar (const bool val)
 
bool isLogicalSizedColumnsAllowed () const
 
bool mustUseBaselineSort () const
 
bool forceFourByteFloat () const
 
void setForceFourByteFloat (const bool val)
 
size_t getGroupbyColCount () const
 
size_t getKeyCount () const
 
size_t getBufferColSlotCount () const
 
size_t getBufferSizeBytes (const RelAlgExecutionUnit &ra_exe_unit, const unsigned thread_count, const ExecutorDeviceType device_type) const
 
size_t getBufferSizeBytes (const ExecutorDeviceType device_type) const
 
size_t getBufferSizeBytes (const ExecutorDeviceType device_type, const size_t override_entry_count) const
 
const ColSlotContextgetColSlotContext () const
 
bool usesGetGroupValueFast () const
 
bool blocksShareMemory () const
 
bool threadsShareMemory () const
 
bool lazyInitGroups (const ExecutorDeviceType) const
 
bool interleavedBins (const ExecutorDeviceType) const
 
size_t sharedMemBytes (const ExecutorDeviceType) const
 
size_t getColOffInBytes (const size_t col_idx) const
 
size_t getColOffInBytesInNextBin (const size_t col_idx) const
 
size_t getNextColOffInBytes (const int8_t *col_ptr, const size_t bin, const size_t col_idx) const
 
size_t getColOnlyOffInBytes (const size_t col_idx) const
 
size_t getRowSize () const
 
size_t getColsSize () const
 
size_t getWarpCount () const
 
size_t getCompactByteWidth () const
 
size_t getEffectiveKeyWidth () const
 
bool isWarpSyncRequired (const ExecutorDeviceType) const
 
std::string toString () const
 

Static Public Member Functions

static TResultSetBufferDescriptor toThrift (const QueryMemoryDescriptor &)
 
static std::unique_ptr< QueryMemoryDescriptorinit (const Executor *executor, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const ColRangeInfo &col_range_info, const KeylessInfo &keyless_info, const bool allow_multifrag, const ExecutorDeviceType device_type, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, const size_t shard_count, const size_t max_groups_buffer_entry_count, RenderInfo *render_info, const CountDistinctDescriptors count_distinct_descriptors, const bool must_use_baseline_sort, const bool output_columnar_hint)
 
static bool many_entries (const int64_t max_val, const int64_t min_val, const int64_t bucket)
 
static bool countDescriptorsLogicallyEmpty (const CountDistinctDescriptors &count_distinct_descriptors)
 
static int8_t pick_target_compact_width (const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const int8_t crt_min_byte_width)
 

Protected Member Functions

void resetGroupColWidths (const std::vector< int8_t > &new_group_col_widths)
 

Private Member Functions

size_t getTotalBytesOfColumnarBuffers () const
 
size_t getTotalBytesOfColumnarBuffers (const size_t num_entries_per_column) const
 
size_t getTotalBytesOfColumnarProjections (const size_t projection_count) const
 

Private Attributes

const Executorexecutor_
 
bool allow_multifrag_
 
QueryDescriptionType query_desc_type_
 
bool keyless_hash_
 
bool interleaved_bins_on_gpu_
 
int32_t idx_target_as_key_
 
std::vector< int8_t > group_col_widths_
 
int8_t group_col_compact_width_
 
std::vector< ssize_t > target_groupby_indices_
 
size_t entry_count_
 
int64_t min_val_
 
int64_t max_val_
 
int64_t bucket_
 
bool has_nulls_
 
GroupByMemSharing sharing_
 
CountDistinctDescriptors count_distinct_descriptors_
 
bool sort_on_gpu_
 
bool output_columnar_
 
bool render_output_
 
bool must_use_baseline_sort_
 
bool force_4byte_float_
 
ColSlotContext col_slot_context_
 

Friends

class ResultSet
 
class QueryExecutionContext
 
template<typename META_CLASS_TYPE >
class AggregateReductionEgress
 

Detailed Description

Definition at line 66 of file QueryMemoryDescriptor.h.

Constructor & Destructor Documentation

◆ QueryMemoryDescriptor() [1/5]

QueryMemoryDescriptor::QueryMemoryDescriptor ( )

Definition at line 466 of file QueryMemoryDescriptor.cpp.

467  : executor_(nullptr)
468  , allow_multifrag_(false)
470  , keyless_hash_(false)
471  , interleaved_bins_on_gpu_(false)
472  , idx_target_as_key_(0)
474  , entry_count_(0)
475  , min_val_(0)
476  , max_val_(0)
477  , bucket_(0)
478  , has_nulls_(false)
480  , sort_on_gpu_(false)
481  , output_columnar_(false)
482  , render_output_(false)
483  , must_use_baseline_sort_(false)
484  , force_4byte_float_(false) {}
QueryDescriptionType query_desc_type_

◆ QueryMemoryDescriptor() [2/5]

QueryMemoryDescriptor::QueryMemoryDescriptor ( const Executor executor,
const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  query_infos,
const bool  allow_multifrag,
const bool  keyless_hash,
const bool  interleaved_bins_on_gpu,
const int32_t  idx_target_as_key,
const ColRangeInfo col_range_info,
const ColSlotContext col_slot_context,
const std::vector< int8_t > &  group_col_widths,
const int8_t  group_col_compact_width,
const std::vector< ssize_t > &  target_groupby_indices,
const size_t  entry_count,
const GroupByMemSharing  sharing,
const bool  shared_mem_for_group_by,
const CountDistinctDescriptors  count_distinct_descriptors,
const bool  sort_on_gpu_hint,
const bool  output_columnar,
const bool  render_output,
const bool  must_use_baseline_sort 
)

Definition at line 368 of file QueryMemoryDescriptor.cpp.

References canOutputColumnar(), CHECK, col_slot_context_, count_distinct_descriptors_, countDescriptorsLogicallyEmpty(), getRowSize(), GroupByBaselineHash, GroupByPerfectHash, interleaved_bins_on_gpu_, isLogicalSizedColumnsAllowed(), keyless_hash_, NonGroupedAggregate, output_columnar_, Projection, query_desc_type_, ColSlotContext::setAllSlotsPaddedSizeToLogicalSize(), ColSlotContext::setAllUnsetSlotsPaddedSize(), SharedForKeylessOneColumnKnownRange, sharing_, sort_on_gpu_, RelAlgExecutionUnit::use_bump_allocator, and ColSlotContext::validate().

389  : executor_(executor)
390  , allow_multifrag_(allow_multifrag)
391  , query_desc_type_(col_range_info.hash_type_)
392  , keyless_hash_(keyless_hash)
393  , interleaved_bins_on_gpu_(interleaved_bins_on_gpu)
394  , idx_target_as_key_(idx_target_as_key)
395  , group_col_widths_(group_col_widths)
396  , group_col_compact_width_(group_col_compact_width)
397  , target_groupby_indices_(target_groupby_indices)
398  , entry_count_(entry_count)
399  , min_val_(col_range_info.min)
400  , max_val_(col_range_info.max)
401  , bucket_(col_range_info.bucket)
402  , has_nulls_(col_range_info.has_nulls)
403  , sharing_(sharing)
404  , count_distinct_descriptors_(count_distinct_descriptors)
405  , output_columnar_(false)
406  , render_output_(render_output)
407  , must_use_baseline_sort_(must_use_baseline_sort)
408  , force_4byte_float_(false)
409  , col_slot_context_(col_slot_context) {
412 
413  // TODO(Saman): should remove this after implementing shared memory path
414  // completely through codegen We should not use the current shared memory path if
415  // more than 8 bytes per group is required
417  shared_mem_for_group_by && (getRowSize() <= sizeof(int64_t))) {
418  // TODO(adb / saman): Move this into a different enum so we can remove
419  // GroupByMemSharing
421  interleaved_bins_on_gpu_ = false;
422  }
423 
424  // Note that output_columnar_ currently defaults to false to avoid issues with
425  // getRowSize above. If output columnar is enable then shared_mem_for_group_by is not,
426  // and the above condition would never be true.
427 
428  sort_on_gpu_ = sort_on_gpu_hint && canOutputColumnar() && !keyless_hash_;
429 
430  if (sort_on_gpu_) {
431  CHECK(!ra_exe_unit.use_bump_allocator);
432  output_columnar_ = true;
433  } else {
434  switch (query_desc_type_) {
436  output_columnar_ = output_columnar_hint;
437  break;
442  break;
444  output_columnar_ = output_columnar_hint;
445  break;
450  break;
451  default:
452  output_columnar_ = false;
453  break;
454  }
455  }
456 
458  // TODO(adb): Ensure fixed size buffer allocations are correct with all logical column
459  // sizes
460  CHECK(!ra_exe_unit.use_bump_allocator);
463  }
464 }
QueryDescriptionType hash_type_
CountDistinctDescriptors count_distinct_descriptors_
QueryDescriptionType query_desc_type_
void setAllSlotsPaddedSizeToLogicalSize()
#define CHECK(condition)
Definition: Logger.h:187
std::vector< int8_t > group_col_widths_
static bool countDescriptorsLogicallyEmpty(const CountDistinctDescriptors &count_distinct_descriptors)
void setAllUnsetSlotsPaddedSize(const int8_t padded_size)
std::vector< ssize_t > target_groupby_indices_
void validate() const
+ Here is the call graph for this function:

◆ QueryMemoryDescriptor() [3/5]

QueryMemoryDescriptor::QueryMemoryDescriptor ( const Executor executor,
const size_t  entry_count,
const QueryDescriptionType  query_desc_type 
)

Definition at line 486 of file QueryMemoryDescriptor.cpp.

489  : executor_(nullptr)
490  , allow_multifrag_(false)
491  , query_desc_type_(query_desc_type)
492  , keyless_hash_(false)
493  , interleaved_bins_on_gpu_(false)
494  , idx_target_as_key_(0)
496  , entry_count_(entry_count)
497  , min_val_(0)
498  , max_val_(0)
499  , bucket_(0)
500  , has_nulls_(false)
502  , sort_on_gpu_(false)
503  , output_columnar_(false)
504  , render_output_(false)
505  , must_use_baseline_sort_(false)
506  , force_4byte_float_(false) {}
QueryDescriptionType query_desc_type_

◆ QueryMemoryDescriptor() [4/5]

QueryMemoryDescriptor::QueryMemoryDescriptor ( const QueryDescriptionType  query_desc_type,
const int64_t  min_val,
const int64_t  max_val,
const bool  has_nulls,
const std::vector< int8_t > &  group_col_widths 
)

Definition at line 508 of file QueryMemoryDescriptor.cpp.

513  : executor_(nullptr)
514  , allow_multifrag_(false)
515  , query_desc_type_(query_desc_type)
516  , keyless_hash_(false)
517  , interleaved_bins_on_gpu_(false)
518  , idx_target_as_key_(0)
519  , group_col_widths_(group_col_widths)
521  , entry_count_(0)
522  , min_val_(min_val)
523  , max_val_(max_val)
524  , bucket_(0)
525  , has_nulls_(false)
527  , sort_on_gpu_(false)
528  , output_columnar_(false)
529  , render_output_(false)
530  , must_use_baseline_sort_(false)
531  , force_4byte_float_(false) {}
QueryDescriptionType query_desc_type_
std::vector< int8_t > group_col_widths_

◆ QueryMemoryDescriptor() [5/5]

QueryMemoryDescriptor::QueryMemoryDescriptor ( const TResultSetBufferDescriptor &  thrift_query_memory_descriptor)

Member Function Documentation

◆ addColSlotInfo()

void QueryMemoryDescriptor::addColSlotInfo ( const std::vector< std::tuple< int8_t, int8_t >> &  slots_for_col)

Definition at line 1079 of file QueryMemoryDescriptor.cpp.

References ColSlotContext::addColumn(), and col_slot_context_.

Referenced by RelAlgExecutor::executeLogicalValues(), perfect_hash_one_col_desc(), and TEST().

1080  {
1081  col_slot_context_.addColumn(slots_for_col);
1082 }
void addColumn(const std::vector< std::tuple< int8_t, int8_t >> &slots_for_col)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ alignPaddedSlots()

void QueryMemoryDescriptor::alignPaddedSlots ( )

Definition at line 1088 of file QueryMemoryDescriptor.cpp.

References ColSlotContext::alignPaddedSlots(), col_slot_context_, and sortOnGpu().

1088  {
1090 }
void alignPaddedSlots(const bool sort_on_gpu)
+ Here is the call graph for this function:

◆ blocksShareMemory()

bool QueryMemoryDescriptor::blocksShareMemory ( ) const

Definition at line 984 of file QueryMemoryDescriptor.cpp.

References bucket_, count_distinct_descriptors_, countDescriptorsLogicallyEmpty(), executor_, g_cluster, getGroupbyColCount(), GPU, GroupByBaselineHash, GroupByPerfectHash, many_entries(), max_val_, min_val_, Projection, query_desc_type_, render_output_, and sharedMemBytes().

Referenced by canOutputColumnar(), QueryMemoryInitializer::computeNumberOfBuffers(), copy_group_by_buffers_from_gpu(), create_dev_group_by_buffers(), and toString().

984  {
985  if (g_cluster) {
986  return true;
987  }
989  return true;
990  }
991  if (executor_->isCPUOnly() || render_output_ ||
995  getGroupbyColCount() > 1)) {
996  return true;
997  }
1001 }
static bool many_entries(const int64_t max_val, const int64_t min_val, const int64_t bucket)
size_t sharedMemBytes(const ExecutorDeviceType) const
CountDistinctDescriptors count_distinct_descriptors_
QueryDescriptionType query_desc_type_
bool g_cluster
static bool countDescriptorsLogicallyEmpty(const CountDistinctDescriptors &count_distinct_descriptors)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ canOutputColumnar()

bool QueryMemoryDescriptor::canOutputColumnar ( ) const

Definition at line 1092 of file QueryMemoryDescriptor.cpp.

References blocksShareMemory(), count_distinct_descriptors_, countDescriptorsLogicallyEmpty(), GPU, interleavedBins(), threadsShareMemory(), and usesGetGroupValueFast().

Referenced by get_heap_key_slot_index(), and QueryMemoryDescriptor().

1092  {
1096 }
bool interleavedBins(const ExecutorDeviceType) const
CountDistinctDescriptors count_distinct_descriptors_
static bool countDescriptorsLogicallyEmpty(const CountDistinctDescriptors &count_distinct_descriptors)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ clearGroupColWidths()

void QueryMemoryDescriptor::clearGroupColWidths ( )
inline

Definition at line 191 of file QueryMemoryDescriptor.h.

191 { group_col_widths_.clear(); }
std::vector< int8_t > group_col_widths_

◆ clearSlotInfo()

void QueryMemoryDescriptor::clearSlotInfo ( )

Definition at line 1084 of file QueryMemoryDescriptor.cpp.

References ColSlotContext::clear(), and col_slot_context_.

1084  {
1086 }
+ Here is the call graph for this function:

◆ clearTargetGroupbyIndices()

void QueryMemoryDescriptor::clearTargetGroupbyIndices ( )
inline

Definition at line 229 of file QueryMemoryDescriptor.h.

229 { target_groupby_indices_.clear(); }
std::vector< ssize_t > target_groupby_indices_

◆ countDescriptorsLogicallyEmpty()

static bool QueryMemoryDescriptor::countDescriptorsLogicallyEmpty ( const CountDistinctDescriptors count_distinct_descriptors)
inlinestatic

Definition at line 145 of file QueryMemoryDescriptor.h.

References Invalid.

Referenced by blocksShareMemory(), canOutputColumnar(), init(), lazyInitGroups(), and QueryMemoryDescriptor().

146  {
147  return std::all_of(count_distinct_descriptors.begin(),
148  count_distinct_descriptors.end(),
149  [](const CountDistinctDescriptor& desc) {
150  return desc.impl_type_ == CountDistinctImplType::Invalid;
151  });
152  }
+ Here is the caller graph for this function:

◆ countDistinctDescriptorsLogicallyEmpty()

bool QueryMemoryDescriptor::countDistinctDescriptorsLogicallyEmpty ( ) const
inline

Definition at line 154 of file QueryMemoryDescriptor.h.

Referenced by QueryMemoryInitializer::allocateCountDistinctGpuMem().

154  {
156  }
CountDistinctDescriptors count_distinct_descriptors_
static bool countDescriptorsLogicallyEmpty(const CountDistinctDescriptors &count_distinct_descriptors)
+ Here is the caller graph for this function:

◆ didOutputColumnar()

bool QueryMemoryDescriptor::didOutputColumnar ( ) const
inline

Definition at line 252 of file QueryMemoryDescriptor.h.

Referenced by QueryMemoryInitializer::allocateCountDistinctSet(), TargetExprCodegen::codegen(), ResultSetReductionJIT::codegen(), GroupByAndAggregate::codegen(), GroupByAndAggregate::codegenAggCalls(), GroupByAndAggregate::codegenAggColumnPtr(), GroupByAndAggregate::codegenGroupBy(), GroupByAndAggregate::codegenMultiColumnBaselineHash(), GroupByAndAggregate::codegenMultiColumnPerfectHash(), GroupByAndAggregate::codegenOutputSlot(), GroupByAndAggregate::codegenSingleColumnPerfectHash(), GroupByAndAggregate::codegenWindowRowPointer(), copy_projection_buffer_from_gpu_columnar(), QueryMemoryInitializer::copyGroupByBuffersFromGpu(), ResultSetStorage::copyKeyColWise(), ResultSet::createComparator(), ResultSet::didOutputColumnar(), anonymous_namespace{ResultSetReduction.cpp}::fill_slots(), fill_storage_buffer(), fill_storage_buffer_baseline_colwise(), fill_storage_buffer_perfect_hash_colwise(), fill_storage_buffer_perfect_hash_rowwise(), ResultSetStorage::fillOneEntryColWise(), ResultSetStorage::fillOneEntryRowWise(), ResultSet::fixupQueryMemoryDescriptor(), get_cols_ptr(), ResultSet::getTargetValueFromBufferColwise(), ResultSetStorage::initializeBaselineValueSlots(), ResultSet::initializeStorage(), anonymous_namespace{TargetExprBuilder.cpp}::is_columnar_projection(), ResultSet::isDirectColumnarConversionPossible(), ResultSetReductionJIT::isEmpty(), ResultSetStorage::isEmptyEntry(), ResultSetStorage::isEmptyEntryColumnar(), QueryExecutionContext::launchCpuCode(), QueryExecutionContext::launchGpuCode(), ResultSet::makeGeoTargetValue(), ResultSetStorage::moveOneEntryToBuffer(), QueryMemoryInitializer::QueryMemoryInitializer(), ResultSetStorage::reduce(), ResultSetStorage::reduceOneEntryBaseline(), ResultSetReductionJIT::reduceOneEntryBaselineIdx(), ResultSetStorage::reduceOneEntryNoCollisionsRowWise(), ResultSetStorage::reduceOneEntrySlotsBaseline(), ResultSetStorage::reduceOneSlotBaseline(), ResultSetStorage::reduceSingleRow(), and ResultSetStorage::rewriteAggregateBufferOffsets().

+ Here is the caller graph for this function:

◆ forceFourByteFloat()

bool QueryMemoryDescriptor::forceFourByteFloat ( ) const
inline

Definition at line 261 of file QueryMemoryDescriptor.h.

Referenced by ResultSet::makeTargetValue().

+ Here is the caller graph for this function:

◆ getBucket()

int64_t QueryMemoryDescriptor::getBucket ( ) const
inline

Definition at line 236 of file QueryMemoryDescriptor.h.

Referenced by GroupByAndAggregate::codegenGroupBy(), and GroupByAndAggregate::codegenSingleColumnPerfectHash().

236 { return bucket_; }
+ Here is the caller graph for this function:

◆ getBufferColSlotCount()

size_t QueryMemoryDescriptor::getBufferColSlotCount ( ) const

Definition at line 964 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, ColSlotContext::getSlotCount(), and target_groupby_indices_.

Referenced by anonymous_namespace{ResultSetIteration.cpp}::advance_col_buff_to_slot(), anonymous_namespace{ResultSetReduction.cpp}::fill_slots(), ResultSetStorage::fillOneEntryColWise(), and ResultSetStorage::fillOneEntryRowWise().

964  {
965  size_t total_slot_count = col_slot_context_.getSlotCount();
966 
967  if (target_groupby_indices_.empty()) {
968  return total_slot_count;
969  }
970  return total_slot_count - std::count_if(target_groupby_indices_.begin(),
972  [](const ssize_t i) { return i >= 0; });
973 }
size_t getSlotCount() const
std::vector< ssize_t > target_groupby_indices_
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ getBufferSizeBytes() [1/3]

size_t QueryMemoryDescriptor::getBufferSizeBytes ( const RelAlgExecutionUnit ra_exe_unit,
const unsigned  thread_count,
const ExecutorDeviceType  device_type 
) const

Definition at line 895 of file QueryMemoryDescriptor.cpp.

References entry_count_, streaming_top_n::get_heap_size(), getRowSize(), SortInfo::limit, SortInfo::offset, output_columnar_, RelAlgExecutionUnit::sort_info, and use_streaming_top_n().

Referenced by QueryMemoryInitializer::allocateCountDistinctSet(), ResultSet::allocateStorage(), QueryMemoryInitializer::applyStreamingTopNOffsetCpu(), QueryMemoryInitializer::copyGroupByBuffersFromGpu(), create_dev_group_by_buffers(), get_heap_key_slot_index(), getBufferSizeBytes(), QueryMemoryInitializer::QueryMemoryInitializer(), ResultSet::radixSortOnGpu(), and TEST().

898  {
899  if (use_streaming_top_n(ra_exe_unit, output_columnar_)) {
900  const size_t n = ra_exe_unit.sort_info.offset + ra_exe_unit.sort_info.limit;
901  return streaming_top_n::get_heap_size(getRowSize(), n, thread_count);
902  }
903  return getBufferSizeBytes(device_type, entry_count_);
904 }
bool use_streaming_top_n(const RelAlgExecutionUnit &ra_exe_unit, const bool output_columnar)
const size_t limit
const SortInfo sort_info
size_t getBufferSizeBytes(const RelAlgExecutionUnit &ra_exe_unit, const unsigned thread_count, const ExecutorDeviceType device_type) const
size_t get_heap_size(const size_t row_size, const size_t n, const size_t thread_count)
const size_t offset
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ getBufferSizeBytes() [2/3]

size_t QueryMemoryDescriptor::getBufferSizeBytes ( const ExecutorDeviceType  device_type) const

Definition at line 941 of file QueryMemoryDescriptor.cpp.

References entry_count_, and getBufferSizeBytes().

942  {
943  return getBufferSizeBytes(device_type, entry_count_);
944 }
size_t getBufferSizeBytes(const RelAlgExecutionUnit &ra_exe_unit, const unsigned thread_count, const ExecutorDeviceType device_type) const
+ Here is the call graph for this function:

◆ getBufferSizeBytes() [3/3]

size_t QueryMemoryDescriptor::getBufferSizeBytes ( const ExecutorDeviceType  device_type,
const size_t  entry_count 
) const

Returns total amount of output buffer memory for each device (CPU/GPU)

Columnar: if projection: it returns index buffer + columnar buffer (all non-lazy columns) if group by: it returns the amount required for each group column (assumes 64-bit per group) + columnar buffer (all involved agg columns)

Row-wise: returns required memory per row multiplied by number of entries

Definition at line 917 of file QueryMemoryDescriptor.cpp.

References align_to_int64(), CHECK_GE, executor_, getColsSize(), getRowSize(), getTotalBytesOfColumnarBuffers(), group_col_widths_, interleavedBins(), keyless_hash_, output_columnar_, Projection, and query_desc_type_.

918  {
920  CHECK_GE(group_col_widths_.size(), size_t(1));
921  auto row_bytes = align_to_int64(getColsSize());
922 
923  return (interleavedBins(device_type) ? executor_->warpSize() : 1) * entry_count *
924  row_bytes;
925  }
926 
927  constexpr size_t row_index_width = sizeof(int64_t);
928  size_t total_bytes{0};
929  if (output_columnar_) {
931  ? row_index_width * entry_count
932  : sizeof(int64_t) * group_col_widths_.size() * entry_count) +
934  } else {
935  total_bytes = getRowSize() * entry_count;
936  }
937 
938  return total_bytes;
939 }
#define CHECK_GE(x, y)
Definition: Logger.h:200
size_t getTotalBytesOfColumnarBuffers() const
bool interleavedBins(const ExecutorDeviceType) const
QueryDescriptionType query_desc_type_
std::vector< int8_t > group_col_widths_
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
+ Here is the call graph for this function:

◆ getColCount()

size_t QueryMemoryDescriptor::getColCount ( ) const

Definition at line 1041 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getColCount().

1041  {
1042  return col_slot_context_.getColCount();
1043 }
size_t getColCount() const
+ Here is the call graph for this function:

◆ getColOffInBytes()

size_t QueryMemoryDescriptor::getColOffInBytes ( const size_t  col_idx) const

Definition at line 792 of file QueryMemoryDescriptor.cpp.

References align_to_int64(), CHECK_EQ, entry_count_, getColOnlyOffInBytes(), getEffectiveKeyWidth(), getPaddedSlotWidthBytes(), getPrependedGroupBufferSizeInBytes(), getWarpCount(), group_col_widths_, keyless_hash_, and output_columnar_.

Referenced by TargetExprCodegen::codegen(), GroupByAndAggregate::codegenAggColumnPtr(), GroupByAndAggregate::codegenOutputSlot(), anonymous_namespace{QueryMemoryInitializer.cpp}::compact_projection_buffer_for_cpu_columnar(), copy_projection_buffer_from_gpu_columnar(), get_cols_ptr(), get_heap_key_slot_index(), QueryExecutionContext::groupBufferToDeinterleavedResults(), QueryMemoryInitializer::initGroups(), inplace_sort_gpu(), and ResultSet::radixSortOnCpu().

792  {
793  const auto warp_count = getWarpCount();
794  if (output_columnar_) {
795  CHECK_EQ(size_t(1), warp_count);
796  size_t offset{0};
797  if (!keyless_hash_) {
799  }
800  for (size_t index = 0; index < col_idx; ++index) {
802  }
803  return offset;
804  }
805 
806  size_t offset{0};
807  if (keyless_hash_) {
808  CHECK_EQ(size_t(1), group_col_widths_.size());
809  } else {
810  offset += group_col_widths_.size() * getEffectiveKeyWidth();
811  offset = align_to_int64(offset);
812  }
813  offset += getColOnlyOffInBytes(col_idx);
814  return offset;
815 }
#define CHECK_EQ(x, y)
Definition: Logger.h:195
size_t getPrependedGroupBufferSizeInBytes() const
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
size_t getColOnlyOffInBytes(const size_t col_idx) const
std::vector< int8_t > group_col_widths_
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
size_t getEffectiveKeyWidth() const
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ getColOffInBytesInNextBin()

size_t QueryMemoryDescriptor::getColOffInBytesInNextBin ( const size_t  col_idx) const

Definition at line 850 of file QueryMemoryDescriptor.cpp.

References CHECK_EQ, getPaddedSlotWidthBytes(), getRowSize(), getWarpCount(), group_col_widths_, and output_columnar_.

Referenced by QueryExecutionContext::groupBufferToDeinterleavedResults().

850  {
851  auto warp_count = getWarpCount();
852  if (output_columnar_) {
853  CHECK_EQ(size_t(1), group_col_widths_.size());
854  CHECK_EQ(size_t(1), warp_count);
855  return getPaddedSlotWidthBytes(col_idx);
856  }
857 
858  return warp_count * getRowSize();
859 }
#define CHECK_EQ(x, y)
Definition: Logger.h:195
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
std::vector< int8_t > group_col_widths_
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ getColOnlyOffInBytes()

size_t QueryMemoryDescriptor::getColOnlyOffInBytes ( const size_t  col_idx) const

Definition at line 779 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getColOnlyOffInBytes().

Referenced by TargetExprCodegen::codegen(), GroupByAndAggregate::codegenAggColumnPtr(), getColOffInBytes(), and ResultSetStorage::reduceSingleRow().

779  {
780  return col_slot_context_.getColOnlyOffInBytes(col_idx);
781 }
size_t getColOnlyOffInBytes(const size_t slot_idx) const
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ getColSlotContext()

const ColSlotContext& QueryMemoryDescriptor::getColSlotContext ( ) const
inline

Definition at line 276 of file QueryMemoryDescriptor.h.

Referenced by ResultSetStorage::reduceEntriesNoCollisionsColWise(), ResultSetStorage::reduceOneEntryNoCollisionsRowWise(), and ResultSetReductionJIT::reduceOneEntryTargetsNoCollisions().

276 { return col_slot_context_; }
+ Here is the caller graph for this function:

◆ getColsSize()

size_t QueryMemoryDescriptor::getColsSize ( ) const

Definition at line 719 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getAllSlotsAlignedPaddedSize().

Referenced by getBufferSizeBytes(), getRowSize(), and QueryExecutionContext::launchCpuCode().

719  {
721 }
size_t getAllSlotsAlignedPaddedSize() const
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ getCompactByteWidth()

size_t QueryMemoryDescriptor::getCompactByteWidth ( ) const

Definition at line 740 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getCompactByteWidth().

Referenced by anonymous_namespace{TargetExprBuilder.cpp}::get_initial_agg_val(), and anonymous_namespace{OutputBufferInitialization.cpp}::init_agg_val_vec().

740  {
742 }
size_t getCompactByteWidth() const
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ getCountDistinctDescriptor()

const CountDistinctDescriptor& QueryMemoryDescriptor::getCountDistinctDescriptor ( const size_t  idx) const
inline

◆ getCountDistinctDescriptorsSize()

size_t QueryMemoryDescriptor::getCountDistinctDescriptorsSize ( ) const
inline

Definition at line 245 of file QueryMemoryDescriptor.h.

Referenced by QueryMemoryInitializer::allocateCountDistinctGpuMem(), anonymous_namespace{QueryMemoryInitializer.cpp}::check_total_bitmap_memory(), ResultSetReductionJIT::reduceOneCountDistinctSlot(), and ResultSetStorage::reduceOneCountDistinctSlot().

245  {
246  return count_distinct_descriptors_.size();
247  }
CountDistinctDescriptors count_distinct_descriptors_
+ Here is the caller graph for this function:

◆ getEffectiveKeyWidth()

◆ getEntryCount()

size_t QueryMemoryDescriptor::getEntryCount ( ) const
inline

Definition at line 231 of file QueryMemoryDescriptor.h.

Referenced by advance_to_next_columnar_key_buff(), advance_to_next_columnar_target_buff(), QueryMemoryInitializer::allocateCountDistinctGpuMem(), ResultSet::append(), QueryMemoryInitializer::applyStreamingTopNOffsetCpu(), QueryMemoryInitializer::applyStreamingTopNOffsetGpu(), anonymous_namespace{QueryMemoryInitializer.cpp}::check_total_bitmap_memory(), GroupByAndAggregate::codegenMultiColumnBaselineHash(), GroupByAndAggregate::codegenMultiColumnPerfectHash(), GroupByAndAggregate::codegenOutputSlot(), GroupByAndAggregate::codegenWindowRowPointer(), anonymous_namespace{QueryMemoryInitializer.cpp}::compact_projection_buffer_for_cpu_columnar(), QueryMemoryInitializer::compactProjectionBuffersCpu(), QueryMemoryInitializer::compactProjectionBuffersGpu(), copy_group_by_buffers_from_gpu(), create_dev_group_by_buffers(), Executor::dispatchFragments(), ResultSet::entryCount(), fill_storage_buffer_baseline_colwise(), fill_storage_buffer_baseline_rowwise(), anonymous_namespace{ResultSetBaselineRadixSortTest.cpp}::fill_storage_buffer_baseline_sort_fp(), anonymous_namespace{ResultSetBaselineRadixSortTest.cpp}::fill_storage_buffer_baseline_sort_int(), fill_storage_buffer_perfect_hash_colwise(), fill_storage_buffer_perfect_hash_rowwise(), ResultSetStorage::fillOneEntryColWise(), ResultSetStorage::fillOneEntryRowWise(), anonymous_namespace{ResultSetReduction.cpp}::get_matching_group_value_reduction(), getPrependedGroupBufferSizeInBytes(), getPrependedGroupColOffInBytes(), ResultSet::getTargetValueFromBufferColwise(), QueryMemoryInitializer::initColumnarGroups(), ResultSetStorage::initializeBaselineValueSlots(), ResultSetStorage::initializeColWise(), ResultSetStorage::initializeRowWise(), ResultSet::initPermutationBuffer(), inplace_sort_gpu(), QueryExecutionContext::launchGpuCode(), ResultSetStorage::moveEntriesToBuffer(), ResultSetStorage::moveOneEntryToBuffer(), query_group_by_template_impl(), QueryMemoryInitializer::QueryMemoryInitializer(), ResultSet::radixSortOnCpu(), ResultSetStorage::reduce(), ResultSetStorage::reduceOneEntryBaseline(), ResultSetStorage::reduceOneEntrySlotsBaseline(), ResultSetStorage::reduceOneSlotBaseline(), anonymous_namespace{ResultSetTest.cpp}::ResultSetEmulator::ResultSetEmulator(), ResultSetStorage::rewriteAggregateBufferOffsets(), ResultSet::sort(), anonymous_namespace{ResultSetTest.cpp}::test_iterate(), and ResultSetReductionJIT::useInterpreter().

231 { return entry_count_; }

◆ getExecutor()

const Executor* QueryMemoryDescriptor::getExecutor ( ) const
inline

Definition at line 163 of file QueryMemoryDescriptor.h.

Referenced by anonymous_namespace{Execute.cpp}::build_row_for_empty_input(), anonymous_namespace{Execute.cpp}::fill_entries_for_empty_input(), ResultSet::getVarlenOrderEntry(), ResultSet::makeGeoTargetValue(), and ResultSet::makeVarlenTargetValue().

163 { return executor_; }
+ Here is the caller graph for this function:

◆ getGpuMemSharing()

GroupByMemSharing QueryMemoryDescriptor::getGpuMemSharing ( ) const
inline

Definition at line 239 of file QueryMemoryDescriptor.h.

Referenced by TargetExprCodegen::codegen(), and query_group_by_template_impl().

239 { return sharing_; }
+ Here is the caller graph for this function:

◆ getGroupbyColCount()

◆ getKeyCount()

size_t QueryMemoryDescriptor::getKeyCount ( ) const
inline

◆ getLogicalSlotWidthBytes()

const int8_t QueryMemoryDescriptor::getLogicalSlotWidthBytes ( const size_t  slot_idx) const

Definition at line 1053 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, ColSlotContext::getSlotInfo(), and SlotSize::logical_size.

Referenced by QueryMemoryInitializer::allocateCountDistinctBuffers(), TargetExprCodegen::codegen(), fill_one_entry_no_collisions(), and ResultSet::getTargetValueFromBufferRowwise().

1054  {
1055  return col_slot_context_.getSlotInfo(slot_idx).logical_size;
1056 }
int8_t logical_size
const SlotSize & getSlotInfo(const size_t slot_idx) const
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ getMaxVal()

int64_t QueryMemoryDescriptor::getMaxVal ( ) const
inline

Definition at line 235 of file QueryMemoryDescriptor.h.

Referenced by GroupByAndAggregate::codegenGroupBy(), and perfect_hash_one_col_desc().

235 { return max_val_; }
+ Here is the caller graph for this function:

◆ getMinVal()

int64_t QueryMemoryDescriptor::getMinVal ( ) const
inline

Definition at line 234 of file QueryMemoryDescriptor.h.

Referenced by GroupByAndAggregate::codegenSingleColumnPerfectHash(), and perfect_hash_one_col_desc().

234 { return min_val_; }
+ Here is the caller graph for this function:

◆ getNextColOffInBytes()

size_t QueryMemoryDescriptor::getNextColOffInBytes ( const int8_t *  col_ptr,
const size_t  bin,
const size_t  col_idx 
) const

Definition at line 861 of file QueryMemoryDescriptor.cpp.

References align_to_int64(), CHECK, CHECK_EQ, entry_count_, getPaddedSlotWidthBytes(), getSlotCount(), getWarpCount(), group_col_widths_, and output_columnar_.

Referenced by QueryMemoryInitializer::initColumnPerRow().

863  {
865  size_t offset{0};
866  auto warp_count = getWarpCount();
867  const auto chosen_bytes = getPaddedSlotWidthBytes(col_idx);
868  const auto total_slot_count = getSlotCount();
869  if (col_idx + 1 == total_slot_count) {
870  if (output_columnar_) {
871  return (entry_count_ - bin) * chosen_bytes;
872  } else {
873  return static_cast<size_t>(align_to_int64(col_ptr + chosen_bytes) - col_ptr);
874  }
875  }
876 
877  const auto next_chosen_bytes = getPaddedSlotWidthBytes(col_idx + 1);
878  if (output_columnar_) {
879  CHECK_EQ(size_t(1), group_col_widths_.size());
880  CHECK_EQ(size_t(1), warp_count);
881 
882  offset = align_to_int64(entry_count_ * chosen_bytes);
883 
884  offset += bin * (next_chosen_bytes - chosen_bytes);
885  return offset;
886  }
887 
888  if (next_chosen_bytes == sizeof(int64_t)) {
889  return static_cast<size_t>(align_to_int64(col_ptr + chosen_bytes) - col_ptr);
890  } else {
891  return chosen_bytes;
892  }
893 }
#define CHECK_EQ(x, y)
Definition: Logger.h:195
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
#define CHECK(condition)
Definition: Logger.h:187
std::vector< int8_t > group_col_widths_
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ getPaddedColWidthForRange()

size_t QueryMemoryDescriptor::getPaddedColWidthForRange ( const size_t  offset,
const size_t  range 
) const
inline

Definition at line 205 of file QueryMemoryDescriptor.h.

Referenced by get_byteoff_of_slot(), and ResultSet::makeGeoTargetValue().

205  {
206  size_t ret = 0;
207  for (size_t i = offset; i < offset + range; i++) {
208  ret += static_cast<size_t>(getPaddedSlotWidthBytes(i));
209  }
210  return ret;
211  }
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
+ Here is the caller graph for this function:

◆ getPaddedSlotWidthBytes()

const int8_t QueryMemoryDescriptor::getPaddedSlotWidthBytes ( const size_t  slot_idx) const

Definition at line 1049 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, ColSlotContext::getSlotInfo(), and SlotSize::padded_size.

Referenced by advance_target_ptr_row_wise(), advance_to_next_columnar_target_buff(), QueryMemoryInitializer::allocateCountDistinctSet(), TargetExprCodegen::codegen(), GroupByAndAggregate::codegenOutputSlot(), compact_init_vals(), anonymous_namespace{QueryMemoryInitializer.cpp}::compact_projection_buffer_for_cpu_columnar(), copy_projection_buffer_from_gpu_columnar(), ResultSet::copyColumnIntoBuffer(), fill_storage_buffer_perfect_hash_colwise(), get_heap_key_slot_index(), get_width_for_slot(), getColOffInBytes(), getColOffInBytesInNextBin(), getNextColOffInBytes(), ResultSet::getPaddedSlotWidthBytes(), ResultSet::getTargetValueFromBufferColwise(), ResultSet::getTargetValueFromBufferRowwise(), anonymous_namespace{OutputBufferInitialization.cpp}::init_agg_val_vec(), QueryMemoryInitializer::initColumnarGroups(), QueryMemoryInitializer::initColumnPerRow(), inplace_sort_gpu(), ResultSetReductionJIT::isEmpty(), ResultSetStorage::isEmptyEntry(), ResultSetStorage::isEmptyEntryColumnar(), ResultSet::makeGeoTargetValue(), AggregateReductionEgress< META_TYPE_CLASS >::operator()(), AggregateReductionEgress< Experimental::MetaTypeClass< Experimental::Geometry > >::operator()(), ResultSet::radixSortOnCpu(), ResultSetStorage::reduceEntriesNoCollisionsColWise(), ResultSetReductionJIT::reduceOneAggregateSlot(), ResultSetStorage::reduceOneEntryNoCollisionsRowWise(), ResultSetReductionJIT::reduceOneEntryTargetsNoCollisions(), ResultSetStorage::reduceOneSlot(), ResultSetStorage::reduceSingleRow(), and ResultSetStorage::rewriteAggregateBufferOffsets().

1049  {
1050  return col_slot_context_.getSlotInfo(slot_idx).padded_size;
1051 }
const SlotSize & getSlotInfo(const size_t slot_idx) const
int8_t padded_size
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ getPrependedGroupBufferSizeInBytes()

size_t QueryMemoryDescriptor::getPrependedGroupBufferSizeInBytes ( ) const

Definition at line 839 of file QueryMemoryDescriptor.cpp.

References align_to_int64(), CHECK, getEntryCount(), getGroupbyColCount(), groupColWidth(), and output_columnar_.

Referenced by getColOffInBytes().

839  {
841  size_t buffer_size{0};
842  for (size_t group_idx = 0; group_idx < getGroupbyColCount(); group_idx++) {
843  buffer_size += align_to_int64(
844  std::max(groupColWidth(group_idx), static_cast<int8_t>(sizeof(int64_t))) *
845  getEntryCount());
846  }
847  return buffer_size;
848 }
int8_t groupColWidth(const size_t key_idx) const
#define CHECK(condition)
Definition: Logger.h:187
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ getPrependedGroupColOffInBytes()

size_t QueryMemoryDescriptor::getPrependedGroupColOffInBytes ( const size_t  group_idx) const

Definition at line 821 of file QueryMemoryDescriptor.cpp.

References align_to_int64(), CHECK, getEntryCount(), getGroupbyColCount(), groupColWidth(), and output_columnar_.

Referenced by ResultSetStorage::copyKeyColWise(), and ResultSetStorage::isEmptyEntryColumnar().

822  {
824  CHECK(group_idx < getGroupbyColCount());
825  size_t offset{0};
826  for (size_t col_idx = 0; col_idx < group_idx; col_idx++) {
827  // TODO(Saman): relax that int64_bit part immediately
828  offset += align_to_int64(
829  std::max(groupColWidth(col_idx), static_cast<int8_t>(sizeof(int64_t))) *
830  getEntryCount());
831  }
832  return offset;
833 }
int8_t groupColWidth(const size_t key_idx) const
#define CHECK(condition)
Definition: Logger.h:187
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ getQueryDescriptionType()

QueryDescriptionType QueryMemoryDescriptor::getQueryDescriptionType ( ) const
inline

Definition at line 165 of file QueryMemoryDescriptor.h.

Referenced by ResultSetReductionJIT::codegen(), GroupByAndAggregate::codegen(), GroupByAndAggregate::codegenAggCalls(), GroupByAndAggregate::codegenAggColumnPtr(), GroupByAndAggregate::codegenGroupBy(), GroupByAndAggregate::codegenMultiColumnPerfectHash(), GroupByAndAggregate::codegenOutputSlot(), Executor::collectAllDeviceResults(), copy_projection_buffer_from_gpu_columnar(), Executor::dispatchFragments(), fill_storage_buffer(), ResultSet::getQueryDescriptionType(), init_agg_val_vec(), anonymous_namespace{TargetExprBuilder.cpp}::is_columnar_projection(), ResultSet::isDirectColumnarConversionPossible(), ResultSetReductionJIT::isEmpty(), ResultSetStorage::isEmptyEntry(), ResultSetStorage::isEmptyEntryColumnar(), QueryExecutionContext::launchCpuCode(), QueryExecutionContext::launchGpuCode(), ResultSetStorage::moveEntriesToBuffer(), ResultSetStorage::reduce(), Executor::reduceMultiDeviceResultSets(), ResultSetStorage::reduceOneEntryBaseline(), ResultSetReductionJIT::reduceOneEntryBaselineIdx(), ResultSetReductionJIT::reduceOneEntryNoCollisionsIdx(), Executor::ExecutionDispatch::run(), Executor::ExecutionDispatch::runImpl(), target_exprs_to_infos(), anonymous_namespace{ResultSetTest.cpp}::test_reduce_random_groups(), and ResultSet::updateStorageEntryCount().

165 { return query_desc_type_; }
QueryDescriptionType query_desc_type_
+ Here is the caller graph for this function:

◆ getQueryExecutionContext()

std::unique_ptr< QueryExecutionContext > QueryMemoryDescriptor::getQueryExecutionContext ( const RelAlgExecutionUnit ra_exe_unit,
const Executor executor,
const ExecutorDeviceType  device_type,
const ExecutorDispatchMode  dispatch_mode,
const int  device_id,
const int64_t  num_rows,
const std::vector< std::vector< const int8_t *>> &  col_buffers,
const std::vector< std::vector< uint64_t >> &  frag_offsets,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
const bool  output_columnar,
const bool  sort_on_gpu,
RenderInfo render_info 
) const

Definition at line 600 of file QueryMemoryDescriptor.cpp.

References QueryExecutionContext.

Referenced by Executor::ExecutionDispatch::runImpl().

612  {
613  if (frag_offsets.empty()) {
614  return nullptr;
615  }
616  return std::unique_ptr<QueryExecutionContext>(
617  new QueryExecutionContext(ra_exe_unit,
618  *this,
619  executor,
620  device_type,
621  dispatch_mode,
622  device_id,
623  num_rows,
624  col_buffers,
625  frag_offsets,
626  row_set_mem_owner,
627  output_columnar,
628  sort_on_gpu,
629  render_info));
630 }
const int8_t const int64_t * num_rows
void sort_on_gpu(int64_t *val_buff, int32_t *key_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes, ThrustAllocator &alloc)
+ Here is the caller graph for this function:

◆ getRowSize()

size_t QueryMemoryDescriptor::getRowSize ( ) const

Definition at line 723 of file QueryMemoryDescriptor.cpp.

References align_to_int64(), CHECK, CHECK_EQ, getColsSize(), getEffectiveKeyWidth(), group_col_widths_, keyless_hash_, and output_columnar_.

Referenced by QueryMemoryInitializer::allocateCountDistinctSet(), QueryMemoryInitializer::applyStreamingTopNOffsetCpu(), QueryMemoryInitializer::applyStreamingTopNOffsetGpu(), GroupByAndAggregate::codegenGroupBy(), GroupByAndAggregate::codegenOutputSlot(), GroupByAndAggregate::codegenWindowRowPointer(), QueryMemoryInitializer::copyGroupByBuffersFromGpu(), create_dev_group_by_buffers(), get_heap_key_slot_index(), getBufferSizeBytes(), getColOffInBytesInNextBin(), QueryMemoryInitializer::initGroups(), QueryMemoryDescriptor(), QueryMemoryInitializer::QueryMemoryInitializer(), ResultSetStorage::reduceSingleRow(), and sharedMemBytes().

723  {
725  size_t total_bytes{0};
726  if (keyless_hash_) {
727  CHECK_EQ(size_t(1), group_col_widths_.size());
728  } else {
729  total_bytes += group_col_widths_.size() * getEffectiveKeyWidth();
730  total_bytes = align_to_int64(total_bytes);
731  }
732  total_bytes += getColsSize();
733  return align_to_int64(total_bytes);
734 }
#define CHECK_EQ(x, y)
Definition: Logger.h:195
#define CHECK(condition)
Definition: Logger.h:187
std::vector< int8_t > group_col_widths_
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
size_t getEffectiveKeyWidth() const
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ getRowWidth()

size_t QueryMemoryDescriptor::getRowWidth ( ) const

Definition at line 1069 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getAllSlotsPaddedSize().

Referenced by get_row_bytes().

1069  {
1070  // Note: Actual row size may include padding (see ResultSetBufferAccessors.h)
1072 }
size_t getAllSlotsPaddedSize() const
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ getSlotCount()

◆ getSlotIndexForSingleSlotCol()

const int8_t QueryMemoryDescriptor::getSlotIndexForSingleSlotCol ( const size_t  col_idx) const

Definition at line 1058 of file QueryMemoryDescriptor.cpp.

References CHECK_EQ, col_slot_context_, and ColSlotContext::getSlotsForCol().

Referenced by QueryMemoryInitializer::allocateCountDistinctBuffers().

1059  {
1060  const auto& col_slots = col_slot_context_.getSlotsForCol(col_idx);
1061  CHECK_EQ(col_slots.size(), size_t(1));
1062  return col_slots.front();
1063 }
#define CHECK_EQ(x, y)
Definition: Logger.h:195
const std::vector< size_t > & getSlotsForCol(const size_t col_idx) const
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ getTargetGroupbyIndex()

ssize_t QueryMemoryDescriptor::getTargetGroupbyIndex ( const size_t  target_idx) const
inline

Definition at line 224 of file QueryMemoryDescriptor.h.

References CHECK_LT.

Referenced by ResultSet::getTargetValueFromBufferColwise(), ResultSet::getTargetValueFromBufferRowwise(), ResultSetReductionJIT::reduceOneEntryBaseline(), ResultSetStorage::reduceOneEntryNoCollisionsRowWise(), ResultSetStorage::reduceOneEntrySlotsBaseline(), ResultSetReductionJIT::reduceOneEntryTargetsNoCollisions(), ResultSetReductionJIT::reduceOneSlot(), and ResultSetStorage::reduceOneSlot().

224  {
225  CHECK_LT(target_idx, target_groupby_indices_.size());
226  return target_groupby_indices_[target_idx];
227  }
#define CHECK_LT(x, y)
Definition: Logger.h:197
std::vector< ssize_t > target_groupby_indices_
+ Here is the caller graph for this function:

◆ getTargetIdxForKey()

int32_t QueryMemoryDescriptor::getTargetIdxForKey ( ) const
inline

Definition at line 178 of file QueryMemoryDescriptor.h.

Referenced by ResultSetReductionJIT::isEmpty(), ResultSetStorage::isEmptyEntry(), ResultSetStorage::isEmptyEntryColumnar(), query_group_by_template_impl(), ResultSetStorage::reduceSingleRow(), and toString().

+ Here is the caller graph for this function:

◆ getTotalBytesOfColumnarBuffers() [1/2]

size_t QueryMemoryDescriptor::getTotalBytesOfColumnarBuffers ( ) const
private

Returns the maximum total number of bytes (including required paddings) to store all non-lazy columns' results for columnar cases.

Definition at line 749 of file QueryMemoryDescriptor.cpp.

References CHECK, col_slot_context_, entry_count_, ColSlotContext::getTotalBytesOfColumnarBuffers(), and output_columnar_.

Referenced by getBufferSizeBytes(), and getTotalBytesOfColumnarProjections().

749  {
752 }
size_t getTotalBytesOfColumnarBuffers(const size_t entry_count) const
#define CHECK(condition)
Definition: Logger.h:187
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ getTotalBytesOfColumnarBuffers() [2/2]

size_t QueryMemoryDescriptor::getTotalBytesOfColumnarBuffers ( const size_t  num_entries_per_column) const
private

This is a helper function that returns the total number of bytes (including required paddings) to store all non-lazy columns' results for columnar cases.

Definition at line 758 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getTotalBytesOfColumnarBuffers().

759  {
760  return col_slot_context_.getTotalBytesOfColumnarBuffers(num_entries_per_column);
761 }
size_t getTotalBytesOfColumnarBuffers(const size_t entry_count) const
+ Here is the call graph for this function:

◆ getTotalBytesOfColumnarProjections()

size_t QueryMemoryDescriptor::getTotalBytesOfColumnarProjections ( const size_t  projection_count) const
private

Returns the effective total number of bytes from columnar projections, which includes 1) total number of bytes used to store all non-lazy columns 2) total number of bytes used to store row indices (for lazy fetches, etc.)

NOTE: this function does not represent the buffer sizes dedicated for the results, but the required memory to fill all valid results into a compact new buffer (with no holes in it)

Definition at line 772 of file QueryMemoryDescriptor.cpp.

References getTotalBytesOfColumnarBuffers().

773  {
774  constexpr size_t row_index_width = sizeof(int64_t);
775  return getTotalBytesOfColumnarBuffers(projection_count) +
776  row_index_width * projection_count;
777 }
size_t getTotalBytesOfColumnarBuffers() const
+ Here is the call graph for this function:

◆ getWarpCount()

size_t QueryMemoryDescriptor::getWarpCount ( ) const

Definition at line 736 of file QueryMemoryDescriptor.cpp.

References executor_, and interleaved_bins_on_gpu_.

Referenced by getColOffInBytes(), getColOffInBytesInNextBin(), and getNextColOffInBytes().

736  {
737  return (interleaved_bins_on_gpu_ ? executor_->warpSize() : 1);
738 }
+ Here is the caller graph for this function:

◆ groupColWidth()

int8_t QueryMemoryDescriptor::groupColWidth ( const size_t  key_idx) const
inline

Definition at line 182 of file QueryMemoryDescriptor.h.

References CHECK_LT.

Referenced by advance_to_next_columnar_key_buff(), ResultSetStorage::copyKeyColWise(), fill_storage_buffer_perfect_hash_colwise(), getPrependedGroupBufferSizeInBytes(), getPrependedGroupColOffInBytes(), and ResultSetStorage::isEmptyEntryColumnar().

182  {
183  CHECK_LT(key_idx, group_col_widths_.size());
184  return group_col_widths_[key_idx];
185  }
#define CHECK_LT(x, y)
Definition: Logger.h:197
std::vector< int8_t > group_col_widths_
+ Here is the caller graph for this function:

◆ groupColWidthsBegin()

const auto QueryMemoryDescriptor::groupColWidthsBegin ( ) const
inline

Definition at line 189 of file QueryMemoryDescriptor.h.

189 { return group_col_widths_.begin(); }
std::vector< int8_t > group_col_widths_

◆ groupColWidthsEnd()

const auto QueryMemoryDescriptor::groupColWidthsEnd ( ) const
inline

Definition at line 190 of file QueryMemoryDescriptor.h.

190 { return group_col_widths_.end(); }
std::vector< int8_t > group_col_widths_

◆ groupColWidthsSize()

size_t QueryMemoryDescriptor::groupColWidthsSize ( ) const
inline

Definition at line 181 of file QueryMemoryDescriptor.h.

Referenced by advance_to_next_columnar_key_buff(), QueryMemoryInitializer::allocateCountDistinctSet(), GroupByAndAggregate::codegenGroupBy(), get_key_bytes_rowwise(), QueryMemoryInitializer::initColumnarGroups(), QueryMemoryInitializer::initGroups(), and ResultSetStorage::isEmptyEntryColumnar().

181 { return group_col_widths_.size(); }
std::vector< int8_t > group_col_widths_
+ Here is the caller graph for this function:

◆ hasInterleavedBinsOnGpu()

bool QueryMemoryDescriptor::hasInterleavedBinsOnGpu ( ) const
inline

Definition at line 175 of file QueryMemoryDescriptor.h.

◆ hasKeylessHash()

◆ hasNulls()

bool QueryMemoryDescriptor::hasNulls ( ) const
inline

Definition at line 238 of file QueryMemoryDescriptor.h.

Referenced by GroupByAndAggregate::codegenGroupBy().

+ Here is the caller graph for this function:

◆ init()

std::unique_ptr< QueryMemoryDescriptor > QueryMemoryDescriptor::init ( const Executor executor,
const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  query_infos,
const ColRangeInfo col_range_info,
const KeylessInfo keyless_info,
const bool  allow_multifrag,
const ExecutorDeviceType  device_type,
const int8_t  crt_min_byte_width,
const bool  sort_on_gpu_hint,
const size_t  shard_count,
const size_t  max_groups_buffer_entry_count,
RenderInfo render_info,
const CountDistinctDescriptors  count_distinct_descriptors,
const bool  must_use_baseline_sort,
const bool  output_columnar_hint 
)
static

Definition at line 145 of file QueryMemoryDescriptor.cpp.

References ColRangeInfo::bucket, CHECK, countDescriptorsLogicallyEmpty(), Estimator, g_enable_smem_group_by, get_col_byte_widths(), GroupByAndAggregate::getBucketedCardinality(), GPU, RelAlgExecutionUnit::groupby_exprs, GroupByBaselineHash, GroupByPerfectHash, ColRangeInfo::hash_type_, RenderInfo::isPotentialInSituRender(), KeylessInfo::keyless, kSAMPLE, SortInfo::limit, many_entries(), ColRangeInfo::max, ColRangeInfo::min, NonGroupedAggregate, SortInfo::offset, anonymous_namespace{QueryMemoryDescriptor.cpp}::pick_baseline_key_width(), pick_target_compact_width(), Projection, RelAlgExecutionUnit::scan_limit, RenderInfo::setInSituDataIfUnset(), Shared, KeylessInfo::shared_mem_support, RelAlgExecutionUnit::sort_info, GroupByAndAggregate::supportedExprForGpuSharedMemUsage(), anonymous_namespace{QueryMemoryDescriptor.cpp}::target_expr_group_by_indices(), anonymous_namespace{QueryMemoryDescriptor.cpp}::target_expr_proj_indices(), RelAlgExecutionUnit::target_exprs, KeylessInfo::target_index, UNREACHABLE, RelAlgExecutionUnit::use_bump_allocator, and use_streaming_top_n().

Referenced by GroupByAndAggregate::initQueryMemoryDescriptorImpl().

160  {
161  auto group_col_widths = get_col_byte_widths(ra_exe_unit.groupby_exprs, {});
162  const bool is_group_by{!group_col_widths.empty()};
163 
164  auto col_slot_context = ColSlotContext(ra_exe_unit.target_exprs, {});
165 
166  const auto min_slot_size = QueryMemoryDescriptor::pick_target_compact_width(
167  ra_exe_unit, query_infos, crt_min_byte_width);
168 
169  col_slot_context.setAllSlotsPaddedSize(min_slot_size);
170  col_slot_context.validate();
171 
172  if (!is_group_by) {
173  CHECK(!must_use_baseline_sort);
174 
175  return std::make_unique<QueryMemoryDescriptor>(
176  executor,
177  ra_exe_unit,
178  query_infos,
179  allow_multifrag,
180  false,
181  false,
182  -1,
183  ColRangeInfo{ra_exe_unit.estimator ? QueryDescriptionType::Estimator
185  0,
186  0,
187  0,
188  false},
189  col_slot_context,
190  std::vector<int8_t>{},
191  /*group_col_compact_width*/ 0,
192  std::vector<ssize_t>{},
193  /*entry_count*/ 1,
195  false,
196  count_distinct_descriptors,
197  false,
198  output_columnar_hint,
199  render_info && render_info->isPotentialInSituRender(),
200  must_use_baseline_sort);
201  }
202 
203  size_t entry_count = 1;
204  auto actual_col_range_info = col_range_info;
205  auto sharing = GroupByMemSharing::Shared;
206  bool interleaved_bins_on_gpu = false;
207  bool keyless_hash = false;
208  bool shared_mem_for_group_by = false;
209  int8_t group_col_compact_width = 0;
210  int32_t idx_target_as_key = -1;
211  auto output_columnar = output_columnar_hint;
212  std::vector<ssize_t> target_groupby_indices;
213 
214  switch (col_range_info.hash_type_) {
216  if (render_info) {
217  render_info->setInSituDataIfUnset(false);
218  }
219 
220  if (group_col_widths.size() > 1) {
221  // col range info max contains the expected cardinality of the output
222  entry_count = static_cast<size_t>(actual_col_range_info.max);
223  actual_col_range_info.bucket = 0;
224  } else {
225  // single column perfect hash
226  idx_target_as_key = keyless_info.target_index;
227  keyless_hash =
228  (!sort_on_gpu_hint ||
230  col_range_info.max, col_range_info.min, col_range_info.bucket)) &&
231  !col_range_info.bucket && !must_use_baseline_sort && keyless_info.keyless;
232  entry_count = std::max(
233  GroupByAndAggregate::getBucketedCardinality(col_range_info), int64_t(1));
234  const size_t interleaved_max_threshold{512};
235 
236  size_t gpu_smem_max_threshold{0};
237  if (device_type == ExecutorDeviceType::GPU) {
238  const auto cuda_mgr = executor->getCatalog()->getDataMgr().getCudaMgr();
239  CHECK(cuda_mgr);
240  /*
241  * We only use shared memory strategy if GPU hardware provides native shared
242  *memory atomics support. From CUDA Toolkit documentation:
243  *https://docs.nvidia.com/cuda/pascal-tuning-guide/index.html#atomic-ops "Like
244  *Maxwell, Pascal [and Volta] provides native shared memory atomic operations
245  *for 32-bit integer arithmetic, along with native 32 or 64-bit compare-and-swap
246  *(CAS)."
247  *
248  **/
249  if (cuda_mgr->isArchMaxwellOrLaterForAll()) {
250  // TODO(Saman): threshold should be eventually set as an optimized policy per
251  // architecture.
252  gpu_smem_max_threshold =
253  std::min((cuda_mgr->isArchVoltaForAll()) ? 4095LU : 2047LU,
254  (cuda_mgr->getMaxSharedMemoryForAll() / sizeof(int64_t) - 1));
255  }
256  }
257 
258  if (must_use_baseline_sort) {
259  target_groupby_indices = target_expr_group_by_indices(ra_exe_unit.groupby_exprs,
260  ra_exe_unit.target_exprs);
261  col_slot_context =
262  ColSlotContext(ra_exe_unit.target_exprs, target_groupby_indices);
263  }
264 
265  const auto group_expr = ra_exe_unit.groupby_exprs.front().get();
266  shared_mem_for_group_by =
267  g_enable_smem_group_by && keyless_hash && keyless_info.shared_mem_support &&
268  (entry_count <= gpu_smem_max_threshold) &&
271  count_distinct_descriptors) &&
272  !output_columnar; // TODO(Saman): add columnar support with the new smem
273  // support.
274 
275  bool has_varlen_sample_agg = false;
276  for (const auto& target_expr : ra_exe_unit.target_exprs) {
277  if (target_expr->get_contains_agg()) {
278  const auto agg_expr = dynamic_cast<Analyzer::AggExpr*>(target_expr);
279  CHECK(agg_expr);
280  if (agg_expr->get_aggtype() == kSAMPLE &&
281  agg_expr->get_type_info().is_varlen()) {
282  has_varlen_sample_agg = true;
283  break;
284  }
285  }
286  }
287 
288  interleaved_bins_on_gpu = keyless_hash && !has_varlen_sample_agg &&
289  (entry_count <= interleaved_max_threshold) &&
290  (device_type == ExecutorDeviceType::GPU) &&
292  count_distinct_descriptors) &&
293  !output_columnar;
294  }
295  break;
296  }
298  if (render_info) {
299  render_info->setInSituDataIfUnset(false);
300  }
301  entry_count = shard_count
302  ? (max_groups_buffer_entry_count + shard_count - 1) / shard_count
303  : max_groups_buffer_entry_count;
304  target_groupby_indices = target_expr_group_by_indices(ra_exe_unit.groupby_exprs,
305  ra_exe_unit.target_exprs);
306  col_slot_context = ColSlotContext(ra_exe_unit.target_exprs, target_groupby_indices);
307 
308  group_col_compact_width =
309  output_columnar ? 8
310  : pick_baseline_key_width(ra_exe_unit, query_infos, executor);
311 
312  actual_col_range_info =
314  break;
315  }
317  CHECK(!must_use_baseline_sort);
318 
319  if (use_streaming_top_n(ra_exe_unit, output_columnar)) {
320  entry_count = ra_exe_unit.sort_info.offset + ra_exe_unit.sort_info.limit;
321  } else {
322  if (ra_exe_unit.use_bump_allocator) {
323  output_columnar = false;
324  entry_count = 0;
325  } else {
326  entry_count = ra_exe_unit.scan_limit
327  ? static_cast<size_t>(ra_exe_unit.scan_limit)
328  : max_groups_buffer_entry_count;
329  }
330  }
331 
332  const auto catalog = executor->getCatalog();
333  CHECK(catalog);
334  target_groupby_indices = executor->plan_state_->allow_lazy_fetch_
335  ? target_expr_proj_indices(ra_exe_unit, *catalog)
336  : std::vector<ssize_t>{};
337 
338  col_slot_context = ColSlotContext(ra_exe_unit.target_exprs, target_groupby_indices);
339  break;
340  }
341  default:
342  UNREACHABLE() << "Unknown query type";
343  }
344 
345  return std::make_unique<QueryMemoryDescriptor>(
346  executor,
347  ra_exe_unit,
348  query_infos,
349  allow_multifrag,
350  keyless_hash,
351  interleaved_bins_on_gpu,
352  idx_target_as_key,
353  actual_col_range_info,
354  col_slot_context,
355  group_col_widths,
356  group_col_compact_width,
357  target_groupby_indices,
358  entry_count,
359  sharing,
360  shared_mem_for_group_by,
361  count_distinct_descriptors,
362  sort_on_gpu_hint,
363  output_columnar,
364  render_info && render_info->isPotentialInSituRender(),
365  must_use_baseline_sort);
366 }
std::vector< Analyzer::Expr * > target_exprs
static bool many_entries(const int64_t max_val, const int64_t min_val, const int64_t bucket)
bool use_streaming_top_n(const RelAlgExecutionUnit &ra_exe_unit, const bool output_columnar)
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
const bool shared_mem_support
bool g_enable_smem_group_by
const bool keyless
std::vector< int8_t > get_col_byte_widths(const T &col_expr_list, const std::vector< ssize_t > &col_exprs_to_not_project)
bool setInSituDataIfUnset(const bool is_in_situ_data)
Definition: RenderInfo.cpp:89
QueryDescriptionType hash_type_
#define UNREACHABLE()
Definition: Logger.h:231
static bool supportedExprForGpuSharedMemUsage(Analyzer::Expr *expr)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
std::vector< ssize_t > target_expr_group_by_indices(const std::list< std::shared_ptr< Analyzer::Expr >> &groupby_exprs, const std::vector< Analyzer::Expr *> &target_exprs)
bool isPotentialInSituRender() const
Definition: RenderInfo.cpp:55
static int8_t pick_target_compact_width(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const int8_t crt_min_byte_width)
const int32_t target_index
#define CHECK(condition)
Definition: Logger.h:187
std::vector< ssize_t > target_expr_proj_indices(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &cat)
static bool countDescriptorsLogicallyEmpty(const CountDistinctDescriptors &count_distinct_descriptors)
int8_t pick_baseline_key_width(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const Executor *executor)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ interleavedBins()

bool QueryMemoryDescriptor::interleavedBins ( const ExecutorDeviceType  device_type) const

Definition at line 1008 of file QueryMemoryDescriptor.cpp.

References GPU, and interleaved_bins_on_gpu_.

Referenced by QueryMemoryInitializer::allocateCountDistinctSet(), canOutputColumnar(), GroupByAndAggregate::codegenSingleColumnPerfectHash(), getBufferSizeBytes(), QueryExecutionContext::groupBufferToResults(), and QueryMemoryInitializer::QueryMemoryInitializer().

+ Here is the caller graph for this function:

◆ isGroupBy()

bool QueryMemoryDescriptor::isGroupBy ( ) const
inline

Definition at line 193 of file QueryMemoryDescriptor.h.

Referenced by anonymous_namespace{TargetExprBuilder.cpp}::get_initial_agg_val(), anonymous_namespace{OutputBufferInitialization.cpp}::init_agg_val_vec(), QueryMemoryInitializer::initColumnPerRow(), QueryExecutionContext::launchCpuCode(), QueryExecutionContext::launchGpuCode(), and QueryMemoryInitializer::QueryMemoryInitializer().

193 { return !group_col_widths_.empty(); }
std::vector< int8_t > group_col_widths_
+ Here is the caller graph for this function:

◆ isLogicalSizedColumnsAllowed()

bool QueryMemoryDescriptor::isLogicalSizedColumnsAllowed ( ) const

Definition at line 957 of file QueryMemoryDescriptor.cpp.

References g_cluster, output_columnar_, Projection, and query_desc_type_.

Referenced by TargetExprCodegen::codegen(), TargetExprCodegenBuilder::codegenMultiSlotSampleExpressions(), anonymous_namespace{OutputBufferInitialization.cpp}::init_agg_val_vec(), ResultSet::makeTargetValue(), QueryMemoryDescriptor(), ResultSetStorage::reduceOneSlot(), and setOutputColumnar().

957  {
958  // In distributed mode, result sets are serialized using rowwise iterators, so we use
959  // consistent slot widths for now
960  return output_columnar_ && !g_cluster &&
962 }
QueryDescriptionType query_desc_type_
bool g_cluster
+ Here is the caller graph for this function:

◆ isSingleColumnGroupByWithPerfectHash()

bool QueryMemoryDescriptor::isSingleColumnGroupByWithPerfectHash ( ) const
inline

Definition at line 167 of file QueryMemoryDescriptor.h.

References GroupByPerfectHash.

Referenced by GroupByAndAggregate::codegenGroupBy(), and ResultSet::getTargetValueFromBufferRowwise().

+ Here is the caller graph for this function:

◆ isWarpSyncRequired()

bool QueryMemoryDescriptor::isWarpSyncRequired ( const ExecutorDeviceType  device_type) const

Definition at line 1030 of file QueryMemoryDescriptor.cpp.

References CHECK, executor_, and GPU.

Referenced by query_group_by_template_impl().

1031  {
1032  if (device_type != ExecutorDeviceType::GPU) {
1033  return false;
1034  } else {
1035  auto cuda_mgr = executor_->getCatalog()->getDataMgr().getCudaMgr();
1036  CHECK(cuda_mgr);
1037  return cuda_mgr->isArchVoltaForAll();
1038  }
1039 }
#define CHECK(condition)
Definition: Logger.h:187
+ Here is the caller graph for this function:

◆ lazyInitGroups()

bool QueryMemoryDescriptor::lazyInitGroups ( const ExecutorDeviceType  device_type) const

Definition at line 1003 of file QueryMemoryDescriptor.cpp.

References count_distinct_descriptors_, countDescriptorsLogicallyEmpty(), GPU, and render_output_.

Referenced by QueryMemoryInitializer::allocateCountDistinctSet(), create_dev_group_by_buffers(), QueryMemoryInitializer::QueryMemoryInitializer(), and toString().

1003  {
1004  return device_type == ExecutorDeviceType::GPU && !render_output_ &&
1006 }
CountDistinctDescriptors count_distinct_descriptors_
static bool countDescriptorsLogicallyEmpty(const CountDistinctDescriptors &count_distinct_descriptors)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ many_entries()

static bool QueryMemoryDescriptor::many_entries ( const int64_t  max_val,
const int64_t  min_val,
const int64_t  bucket 
)
inlinestatic

Definition at line 139 of file QueryMemoryDescriptor.h.

Referenced by blocksShareMemory(), and init().

141  {
142  return max_val - min_val > 10000 * std::max(bucket, int64_t(1));
143  }
+ Here is the caller graph for this function:

◆ mustUseBaselineSort()

bool QueryMemoryDescriptor::mustUseBaselineSort ( ) const
inline

Definition at line 257 of file QueryMemoryDescriptor.h.

Referenced by GroupByAndAggregate::codegenSingleColumnPerfectHash().

+ Here is the caller graph for this function:

◆ operator==()

bool QueryMemoryDescriptor::operator== ( const QueryMemoryDescriptor other) const

Definition at line 533 of file QueryMemoryDescriptor.cpp.

References bucket_, col_slot_context_, count_distinct_descriptors_, force_4byte_float_, group_col_compact_width_, group_col_widths_, has_nulls_, idx_target_as_key_, interleaved_bins_on_gpu_, keyless_hash_, max_val_, min_val_, output_columnar_, query_desc_type_, sharing_, sort_on_gpu_, and target_groupby_indices_.

533  {
534  // Note that this method does not check ptr reference members (e.g. executor_) or
535  // entry_count_
536  if (query_desc_type_ != other.query_desc_type_) {
537  return false;
538  }
539  if (keyless_hash_ != other.keyless_hash_) {
540  return false;
541  }
543  return false;
544  }
545  if (idx_target_as_key_ != other.idx_target_as_key_) {
546  return false;
547  }
548  if (force_4byte_float_ != other.force_4byte_float_) {
549  return false;
550  }
551  if (group_col_widths_ != other.group_col_widths_) {
552  return false;
553  }
555  return false;
556  }
558  return false;
559  }
560  if (min_val_ != other.min_val_) {
561  return false;
562  }
563  if (max_val_ != other.max_val_) {
564  return false;
565  }
566  if (bucket_ != other.bucket_) {
567  return false;
568  }
569  if (has_nulls_ != other.has_nulls_) {
570  return false;
571  }
572  if (sharing_ != other.sharing_) {
573  return false;
574  }
576  return false;
577  } else {
578  // Count distinct descriptors can legitimately differ in device only.
579  for (size_t i = 0; i < count_distinct_descriptors_.size(); ++i) {
580  auto ref_count_distinct_desc = other.count_distinct_descriptors_[i];
581  auto count_distinct_desc = count_distinct_descriptors_[i];
582  count_distinct_desc.device_type = ref_count_distinct_desc.device_type;
583  if (ref_count_distinct_desc != count_distinct_desc) {
584  return false;
585  }
586  }
587  }
588  if (sort_on_gpu_ != other.sort_on_gpu_) {
589  return false;
590  }
591  if (output_columnar_ != other.output_columnar_) {
592  return false;
593  }
594  if (col_slot_context_ != other.col_slot_context_) {
595  return false;
596  }
597  return true;
598 }
CountDistinctDescriptors count_distinct_descriptors_
QueryDescriptionType query_desc_type_
std::vector< int8_t > group_col_widths_
std::vector< ssize_t > target_groupby_indices_

◆ pick_target_compact_width()

int8_t QueryMemoryDescriptor::pick_target_compact_width ( const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  query_infos,
const int8_t  crt_min_byte_width 
)
static

Definition at line 632 of file QueryMemoryDescriptor.cpp.

References CHECK, CHECK_EQ, g_bigint_count, get_col_byte_widths(), Analyzer::UOper::get_operand(), Analyzer::Expr::get_type_info(), RelAlgExecutionUnit::groupby_exprs, RelAlgExecutionUnit::input_col_descs, anonymous_namespace{QueryMemoryDescriptor.cpp}::is_int_and_no_bigger_than(), kCOUNT, kENCODING_DICT, kUNNEST, and RelAlgExecutionUnit::target_exprs.

Referenced by init().

635  {
636  if (g_bigint_count) {
637  return sizeof(int64_t);
638  }
639  int8_t compact_width{0};
640  auto col_it = ra_exe_unit.input_col_descs.begin();
641  int unnest_array_col_id{std::numeric_limits<int>::min()};
642  for (const auto groupby_expr : ra_exe_unit.groupby_exprs) {
643  const auto uoper = dynamic_cast<Analyzer::UOper*>(groupby_expr.get());
644  if (uoper && uoper->get_optype() == kUNNEST) {
645  const auto& arg_ti = uoper->get_operand()->get_type_info();
646  CHECK(arg_ti.is_array());
647  const auto& elem_ti = arg_ti.get_elem_type();
648  if (elem_ti.is_string() && elem_ti.get_compression() == kENCODING_DICT) {
649  unnest_array_col_id = (*col_it)->getColId();
650  } else {
651  compact_width = crt_min_byte_width;
652  break;
653  }
654  }
655  ++col_it;
656  }
657  if (!compact_width &&
658  (ra_exe_unit.groupby_exprs.size() != 1 || !ra_exe_unit.groupby_exprs.front())) {
659  compact_width = crt_min_byte_width;
660  }
661  if (!compact_width) {
662  col_it = ra_exe_unit.input_col_descs.begin();
663  std::advance(col_it, ra_exe_unit.groupby_exprs.size());
664  for (const auto target : ra_exe_unit.target_exprs) {
665  const auto& ti = target->get_type_info();
666  const auto agg = dynamic_cast<const Analyzer::AggExpr*>(target);
667  if (agg && agg->get_arg()) {
668  compact_width = crt_min_byte_width;
669  break;
670  }
671 
672  if (agg) {
673  CHECK_EQ(kCOUNT, agg->get_aggtype());
674  CHECK(!agg->get_is_distinct());
675  ++col_it;
676  continue;
677  }
678 
679  if (is_int_and_no_bigger_than(ti, 4) ||
680  (ti.is_string() && ti.get_compression() == kENCODING_DICT)) {
681  ++col_it;
682  continue;
683  }
684 
685  const auto uoper = dynamic_cast<Analyzer::UOper*>(target);
686  if (uoper && uoper->get_optype() == kUNNEST &&
687  (*col_it)->getColId() == unnest_array_col_id) {
688  const auto arg_ti = uoper->get_operand()->get_type_info();
689  CHECK(arg_ti.is_array());
690  const auto& elem_ti = arg_ti.get_elem_type();
691  if (elem_ti.is_string() && elem_ti.get_compression() == kENCODING_DICT) {
692  ++col_it;
693  continue;
694  }
695  }
696 
697  compact_width = crt_min_byte_width;
698  break;
699  }
700  }
701  if (!compact_width) {
702  size_t total_tuples{0};
703  for (const auto& qi : query_infos) {
704  total_tuples += qi.info.getNumTuples();
705  }
706  return total_tuples <= static_cast<size_t>(std::numeric_limits<uint32_t>::max()) ||
707  unnest_array_col_id != std::numeric_limits<int>::min()
708  ? 4
709  : crt_min_byte_width;
710  } else {
711  // TODO(miyu): relax this condition to allow more cases just w/o padding
712  for (auto wid : get_col_byte_widths(ra_exe_unit.target_exprs, {})) {
713  compact_width = std::max(compact_width, wid);
714  }
715  return compact_width;
716  }
717 }
std::vector< Analyzer::Expr * > target_exprs
#define CHECK_EQ(x, y)
Definition: Logger.h:195
std::vector< int8_t > get_col_byte_widths(const T &col_expr_list, const std::vector< ssize_t > &col_exprs_to_not_project)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
bool g_bigint_count
Definition: sqldefs.h:71
bool is_int_and_no_bigger_than(const SQLTypeInfo &ti, const size_t byte_width)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:77
#define CHECK(condition)
Definition: Logger.h:187
std::list< std::shared_ptr< const InputColDescriptor > > input_col_descs
const Expr * get_operand() const
Definition: Analyzer.h:364
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ resetGroupColWidths()

void QueryMemoryDescriptor::resetGroupColWidths ( const std::vector< int8_t > &  new_group_col_widths)
inlineprotected

Definition at line 311 of file QueryMemoryDescriptor.h.

Referenced by ResultSet::fixupQueryMemoryDescriptor().

311  {
312  group_col_widths_ = new_group_col_widths;
313  }
std::vector< int8_t > group_col_widths_
+ Here is the caller graph for this function:

◆ setEntryCount()

void QueryMemoryDescriptor::setEntryCount ( const size_t  val)
inline

Definition at line 232 of file QueryMemoryDescriptor.h.

Referenced by ResultSet::append(), perfect_hash_one_col_desc(), Executor::reduceMultiDeviceResultSets(), TEST(), ResultSetStorage::updateEntryCount(), and ResultSet::updateStorageEntryCount().

+ Here is the caller graph for this function:

◆ setForceFourByteFloat()

void QueryMemoryDescriptor::setForceFourByteFloat ( const bool  val)
inline

Definition at line 262 of file QueryMemoryDescriptor.h.

◆ setGroupColCompactWidth()

void QueryMemoryDescriptor::setGroupColCompactWidth ( const int8_t  val)
inline

Definition at line 195 of file QueryMemoryDescriptor.h.

Referenced by anonymous_namespace{ResultSetBaselineRadixSortTest.cpp}::baseline_sort_desc().

+ Here is the caller graph for this function:

◆ setHasInterleavedBinsOnGpu()

void QueryMemoryDescriptor::setHasInterleavedBinsOnGpu ( const bool  val)
inline

Definition at line 176 of file QueryMemoryDescriptor.h.

◆ setHasKeylessHash()

void QueryMemoryDescriptor::setHasKeylessHash ( const bool  val)
inline

Definition at line 173 of file QueryMemoryDescriptor.h.

Referenced by TEST().

+ Here is the caller graph for this function:

◆ setOutputColumnar()

void QueryMemoryDescriptor::setOutputColumnar ( const bool  val)

Definition at line 946 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, isLogicalSizedColumnsAllowed(), output_columnar_, and ColSlotContext::setAllSlotsPaddedSizeToLogicalSize().

Referenced by TEST().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ setQueryDescriptionType()

void QueryMemoryDescriptor::setQueryDescriptionType ( const QueryDescriptionType  val)
inline

Definition at line 166 of file QueryMemoryDescriptor.h.

Referenced by SpeculativeTopNMap::asRows().

166 { query_desc_type_ = val; }
QueryDescriptionType query_desc_type_
+ Here is the caller graph for this function:

◆ setTargetIdxForKey()

void QueryMemoryDescriptor::setTargetIdxForKey ( const int32_t  val)
inline

Definition at line 179 of file QueryMemoryDescriptor.h.

◆ sharedMemBytes()

size_t QueryMemoryDescriptor::sharedMemBytes ( const ExecutorDeviceType  device_type) const

Definition at line 1012 of file QueryMemoryDescriptor.cpp.

References CHECK, CHECK_EQ, CPU, entry_count_, executor_, getRowSize(), GPU, SharedForKeylessOneColumnKnownRange, and sharing_.

Referenced by blocksShareMemory(), QueryExecutionContext::launchGpuCode(), and query_group_by_template_impl().

1012  {
1013  CHECK(device_type == ExecutorDeviceType::CPU || device_type == ExecutorDeviceType::GPU);
1014  if (device_type == ExecutorDeviceType::CPU) {
1015  return 0;
1016  }
1017  // if performing keyless aggregate query with a single column group-by:
1019  CHECK_EQ(getRowSize(),
1020  sizeof(int64_t)); // Currently just designed for this scenario
1021  size_t shared_mem_size =
1022  (/*bin_count=*/entry_count_ + 1) * sizeof(int64_t); // one extra for NULL values
1023  CHECK(shared_mem_size <=
1024  executor_->getCatalog()->getDataMgr().getCudaMgr()->getMaxSharedMemoryForAll());
1025  return shared_mem_size;
1026  }
1027  return 0;
1028 }
#define CHECK_EQ(x, y)
Definition: Logger.h:195
#define CHECK(condition)
Definition: Logger.h:187
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ sortOnGpu()

bool QueryMemoryDescriptor::sortOnGpu ( ) const
inline

Definition at line 249 of file QueryMemoryDescriptor.h.

Referenced by alignPaddedSlots(), QueryExecutionContext::launchGpuCode(), Executor::ExecutionDispatch::runImpl(), ResultSet::sort(), and use_speculative_top_n().

+ Here is the caller graph for this function:

◆ targetGroupbyIndicesSize()

size_t QueryMemoryDescriptor::targetGroupbyIndicesSize ( ) const
inline

◆ threadsShareMemory()

bool QueryMemoryDescriptor::threadsShareMemory ( ) const

◆ toString()

std::string QueryMemoryDescriptor::toString ( ) const

Definition at line 1124 of file QueryMemoryDescriptor.cpp.

References allow_multifrag_, blocksShareMemory(), anonymous_namespace{QueryMemoryDescriptor.cpp}::boolToString(), bucket_, col_slot_context_, entry_count_, getTargetIdxForKey(), GPU, interleaved_bins_on_gpu_, keyless_hash_, lazyInitGroups(), max_val_, min_val_, must_use_baseline_sort_, output_columnar_, query_desc_type_, anonymous_namespace{QueryMemoryDescriptor.cpp}::queryDescTypeToString(), render_output_, sort_on_gpu_, threadsShareMemory(), to_string(), ColSlotContext::toString(), and usesGetGroupValueFast().

Referenced by Executor::dispatchFragments().

1124  {
1125  std::string str;
1126  str += "Query Memory Descriptor State\n";
1127  str += "\tQuery Type: " + queryDescTypeToString(query_desc_type_) + "\n";
1128  str += "\tAllow Multifrag: " + boolToString(allow_multifrag_) + "\n";
1129  str +=
1130  "\tKeyless Hash: " + boolToString(keyless_hash_) +
1131  (keyless_hash_ ? ", target index for key: " + std::to_string(getTargetIdxForKey())
1132  : "") +
1133  "\n";
1134  str += "\tInterleaved Bins on GPU: " + boolToString(interleaved_bins_on_gpu_) + "\n";
1135  str += "\tBlocks Share Memory: " + boolToString(blocksShareMemory()) + "\n";
1136  str += "\tThreads Share Memory: " + boolToString(threadsShareMemory()) + "\n";
1137  str += "\tUses Fast Group Values: " + boolToString(usesGetGroupValueFast()) + "\n";
1138  str += "\tLazy Init Groups (GPU): " +
1140  str += "\tEntry Count: " + std::to_string(entry_count_) + "\n";
1141  str += "\tMin Val (perfect hash only): " + std::to_string(min_val_) + "\n";
1142  str += "\tMax Val (perfect hash only): " + std::to_string(max_val_) + "\n";
1143  str += "\tBucket Val (perfect hash only): " + std::to_string(bucket_) + "\n";
1144  str += "\tSort on GPU: " + boolToString(sort_on_gpu_) + "\n";
1145  str += "\tOutput Columnar: " + boolToString(output_columnar_) + "\n";
1146  str += "\tRender Output: " + boolToString(render_output_) + "\n";
1147  str += "\tUse Baseline Sort: " + boolToString(must_use_baseline_sort_) + "\n";
1148  str += "\t" + col_slot_context_.toString();
1149  return str;
1150 }
std::string to_string(char const *&&v)
std::string queryDescTypeToString(const QueryDescriptionType val)
int32_t getTargetIdxForKey() const
std::string toString() const
QueryDescriptionType query_desc_type_
bool lazyInitGroups(const ExecutorDeviceType) const
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ toThrift()

static TResultSetBufferDescriptor QueryMemoryDescriptor::toThrift ( const QueryMemoryDescriptor )
static

◆ updateActualMinByteWidth()

int8_t QueryMemoryDescriptor::updateActualMinByteWidth ( const int8_t  actual_min_byte_width) const

Definition at line 1074 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getMinPaddedByteSize().

1075  {
1076  return col_slot_context_.getMinPaddedByteSize(actual_min_byte_width);
1077 }
int8_t getMinPaddedByteSize(const int8_t actual_min_byte_width) const
+ Here is the call graph for this function:

◆ useConsistentSlotWidthSize()

void QueryMemoryDescriptor::useConsistentSlotWidthSize ( const int8_t  slot_width_size)

Definition at line 1065 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::setAllSlotsSize().

1065  {
1066  col_slot_context_.setAllSlotsSize(slot_width_size);
1067 }
void setAllSlotsSize(const int8_t slot_width_size)
+ Here is the call graph for this function:

◆ usesGetGroupValueFast()

bool QueryMemoryDescriptor::usesGetGroupValueFast ( ) const

Definition at line 975 of file QueryMemoryDescriptor.cpp.

References getGroupbyColCount(), GroupByPerfectHash, and query_desc_type_.

Referenced by canOutputColumnar(), GroupByAndAggregate::codegen(), GroupByAndAggregate::codegenSingleColumnPerfectHash(), and toString().

+ Here is the call graph for this function:
+ Here is the caller graph for this function:

Friends And Related Function Documentation

◆ AggregateReductionEgress

template<typename META_CLASS_TYPE >
friend class AggregateReductionEgress
friend

Definition at line 355 of file QueryMemoryDescriptor.h.

◆ QueryExecutionContext

friend class QueryExecutionContext
friend

Definition at line 352 of file QueryMemoryDescriptor.h.

Referenced by getQueryExecutionContext().

◆ ResultSet

friend class ResultSet
friend

Definition at line 351 of file QueryMemoryDescriptor.h.

Member Data Documentation

◆ allow_multifrag_

bool QueryMemoryDescriptor::allow_multifrag_
private

Definition at line 317 of file QueryMemoryDescriptor.h.

Referenced by toString().

◆ bucket_

int64_t QueryMemoryDescriptor::bucket_
private

Definition at line 334 of file QueryMemoryDescriptor.h.

Referenced by blocksShareMemory(), operator==(), and toString().

◆ col_slot_context_

◆ count_distinct_descriptors_

◆ entry_count_

size_t QueryMemoryDescriptor::entry_count_
private

◆ executor_

const Executor* QueryMemoryDescriptor::executor_
private

◆ force_4byte_float_

bool QueryMemoryDescriptor::force_4byte_float_
private

Definition at line 343 of file QueryMemoryDescriptor.h.

Referenced by operator==().

◆ group_col_compact_width_

int8_t QueryMemoryDescriptor::group_col_compact_width_
private

Definition at line 326 of file QueryMemoryDescriptor.h.

Referenced by operator==().

◆ group_col_widths_

std::vector<int8_t> QueryMemoryDescriptor::group_col_widths_
private

◆ has_nulls_

bool QueryMemoryDescriptor::has_nulls_
private

Definition at line 335 of file QueryMemoryDescriptor.h.

Referenced by operator==().

◆ idx_target_as_key_

int32_t QueryMemoryDescriptor::idx_target_as_key_
private

Definition at line 321 of file QueryMemoryDescriptor.h.

Referenced by operator==().

◆ interleaved_bins_on_gpu_

bool QueryMemoryDescriptor::interleaved_bins_on_gpu_
private

◆ keyless_hash_

bool QueryMemoryDescriptor::keyless_hash_
private

◆ max_val_

int64_t QueryMemoryDescriptor::max_val_
private

Definition at line 333 of file QueryMemoryDescriptor.h.

Referenced by blocksShareMemory(), operator==(), and toString().

◆ min_val_

int64_t QueryMemoryDescriptor::min_val_
private

Definition at line 331 of file QueryMemoryDescriptor.h.

Referenced by blocksShareMemory(), operator==(), and toString().

◆ must_use_baseline_sort_

bool QueryMemoryDescriptor::must_use_baseline_sort_
private

Definition at line 341 of file QueryMemoryDescriptor.h.

Referenced by toString().

◆ output_columnar_

◆ query_desc_type_

◆ render_output_

bool QueryMemoryDescriptor::render_output_
private

Definition at line 340 of file QueryMemoryDescriptor.h.

Referenced by blocksShareMemory(), lazyInitGroups(), and toString().

◆ sharing_

GroupByMemSharing QueryMemoryDescriptor::sharing_
private

Definition at line 336 of file QueryMemoryDescriptor.h.

Referenced by operator==(), QueryMemoryDescriptor(), and sharedMemBytes().

◆ sort_on_gpu_

bool QueryMemoryDescriptor::sort_on_gpu_
private

Definition at line 338 of file QueryMemoryDescriptor.h.

Referenced by operator==(), QueryMemoryDescriptor(), and toString().

◆ target_groupby_indices_

std::vector<ssize_t> QueryMemoryDescriptor::target_groupby_indices_
private

Definition at line 329 of file QueryMemoryDescriptor.h.

Referenced by getBufferColSlotCount(), and operator==().


The documentation for this class was generated from the following files: