#include <QueryMemoryDescriptor.h>

Collaboration diagram for QueryMemoryDescriptor:

Public Member Functions
	QueryMemoryDescriptor ()

	QueryMemoryDescriptor (const Executor *executor, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const bool allow_multifrag, const bool keyless_hash, const bool interleaved_bins_on_gpu, const int32_t idx_target_as_key, const ColRangeInfo &col_range_info, const ColSlotContext &col_slot_context, const std::vector< int8_t > &group_col_widths, const int8_t group_col_compact_width, const std::vector< int64_t > &target_groupby_indices, const size_t entry_count, const ApproxQuantileDescriptors &, const CountDistinctDescriptors, const bool sort_on_gpu_hint, const bool output_columnar, const bool render_output, const bool must_use_baseline_sort, const bool use_streaming_top_n, const bool threads_can_reuse_group_by_buffers)

	QueryMemoryDescriptor (const Executor *executor, const size_t entry_count, const QueryDescriptionType query_desc_type)

	QueryMemoryDescriptor (const QueryDescriptionType query_desc_type, const int64_t min_val, const int64_t max_val, const bool has_nulls, const std::vector< int8_t > &group_col_widths)

	QueryMemoryDescriptor (const TResultSetBufferDescriptor &thrift_query_memory_descriptor)

bool	operator== (const QueryMemoryDescriptor &other) const

std::unique_ptr < QueryExecutionContext >	getQueryExecutionContext (const RelAlgExecutionUnit &, const Executor executor, const ExecutorDeviceType device_type, const ExecutorDispatchMode dispatch_mode, const int device_id, const shared::TableKey &outer_table_key, const int64_t num_rows, const std::vector< std::vector< const int8_t >> &col_buffers, const std::vector< std::vector< uint64_t >> &frag_offsets, std::shared_ptr< RowSetMemoryOwner >, const bool output_columnar, const bool sort_on_gpu, const size_t thread_idx, RenderInfo *) const

bool	countDistinctDescriptorsLogicallyEmpty () const

const Executor *	getExecutor () const

QueryDescriptionType	getQueryDescriptionType () const

void	setQueryDescriptionType (const QueryDescriptionType val)

bool	isSingleColumnGroupByWithPerfectHash () const

bool	hasKeylessHash () const

void	setHasKeylessHash (const bool val)

bool	hasInterleavedBinsOnGpu () const

void	setHasInterleavedBinsOnGpu (const bool val)

int32_t	getTargetIdxForKey () const

void	setTargetIdxForKey (const int32_t val)

int8_t	groupColWidth (const size_t key_idx) const

size_t	getPrependedGroupColOffInBytes (const size_t group_idx) const

size_t	getPrependedGroupBufferSizeInBytes () const

const auto	groupColWidthsBegin () const

const auto	groupColWidthsEnd () const

void	clearGroupColWidths ()

bool	isGroupBy () const

void	setGroupColCompactWidth (const int8_t val)

size_t	getColCount () const

size_t	getSlotCount () const

const int8_t	getPaddedSlotWidthBytes (const size_t slot_idx) const

const int8_t	getLogicalSlotWidthBytes (const size_t slot_idx) const

void	setPaddedSlotWidthBytes (const size_t slot_idx, const int8_t bytes)

const int8_t	getSlotIndexForSingleSlotCol (const size_t col_idx) const

size_t	getPaddedColWidthForRange (const size_t offset, const size_t range) const

void	useConsistentSlotWidthSize (const int8_t slot_width_size)

size_t	getRowWidth () const

int8_t	updateActualMinByteWidth (const int8_t actual_min_byte_width) const

void	addColSlotInfo (const std::vector< std::tuple< int8_t, int8_t >> &slots_for_col)

void	addColSlotInfoFlatBuffer (const int64_t flatbuffer_size)

int64_t	getFlatBufferSize (const size_t slot_idx) const

bool	checkSlotUsesFlatBufferFormat (const size_t slot_idx) const

int64_t	getPaddedSlotBufferSize (const size_t slot_idx) const

void	clearSlotInfo ()

void	alignPaddedSlots ()

int64_t	getTargetGroupbyIndex (const size_t target_idx) const

void	setAllTargetGroupbyIndices (std::vector< int64_t > group_by_indices)

size_t	targetGroupbyIndicesSize () const

size_t	targetGroupbyNegativeIndicesSize () const

void	clearTargetGroupbyIndices ()

size_t	getEntryCount () const

void	setEntryCount (const size_t val)

int64_t	getMinVal () const

int64_t	getMaxVal () const

int64_t	getBucket () const

bool	hasNulls () const

const ApproxQuantileDescriptors &	getApproxQuantileDescriptors () const

const CountDistinctDescriptor &	getCountDistinctDescriptor (const size_t idx) const

size_t	getCountDistinctDescriptorsSize () const

bool	sortOnGpu () const

bool	canOutputColumnar () const

bool	didOutputColumnar () const

void	setOutputColumnar (const bool val)

bool	useStreamingTopN () const

bool	isLogicalSizedColumnsAllowed () const

bool	mustUseBaselineSort () const

bool	threadsCanReuseGroupByBuffers () const

void	setThreadsCanReuseGroupByBuffers (const bool val)

bool	forceFourByteFloat () const

void	setForceFourByteFloat (const bool val)

size_t	getGroupbyColCount () const

size_t	getKeyCount () const

size_t	getBufferColSlotCount () const

size_t	getBufferSizeBytes (const RelAlgExecutionUnit &ra_exe_unit, const unsigned thread_count, const ExecutorDeviceType device_type) const

size_t	getBufferSizeBytes (const ExecutorDeviceType device_type) const

size_t	getBufferSizeBytes (const ExecutorDeviceType device_type, const size_t override_entry_count) const

const ColSlotContext &	getColSlotContext () const

bool	usesGetGroupValueFast () const

bool	blocksShareMemory () const

bool	threadsShareMemory () const

bool	lazyInitGroups (const ExecutorDeviceType) const

bool	interleavedBins (const ExecutorDeviceType) const

size_t	getColOffInBytes (const size_t col_idx) const

size_t	getColOffInBytesInNextBin (const size_t col_idx) const

size_t	getNextColOffInBytes (const int8_t *col_ptr, const size_t bin, const size_t col_idx) const

size_t	getNextColOffInBytesRowOnly (const int8_t *col_ptr, const size_t col_idx) const

size_t	getColOnlyOffInBytes (const size_t col_idx) const

size_t	getRowSize () const

size_t	getColsSize () const

size_t	getWarpCount () const

size_t	getCompactByteWidth () const

size_t	getEffectiveKeyWidth () const

bool	isWarpSyncRequired (const ExecutorDeviceType) const

std::string	queryDescTypeToString () const

std::string	toString () const

std::string	reductionKey () const

bool	hasVarlenOutput () const

std::optional< size_t >	varlenOutputBufferElemSize () const

size_t	varlenOutputRowSizeToSlot (const size_t slot_idx) const

bool	slotIsVarlenOutput (const size_t slot_idx) const

size_t	getAvailableCpuThreads () const

void	setAvailableCpuThreads (size_t num_available_threads) const

std::optional< size_t >	getMaxPerDeviceCardinality (const RelAlgExecutionUnit &ra_exe_unit) const

bool	canUsePerDeviceCardinality (const RelAlgExecutionUnit &ra_exe_unit) const

Static Public Member Functions
static TResultSetBufferDescriptor	toThrift (const QueryMemoryDescriptor &)

static std::unique_ptr < QueryMemoryDescriptor >	init (const Executor executor, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const ColRangeInfo &col_range_info, const KeylessInfo &keyless_info, const bool allow_multifrag, const ExecutorDeviceType device_type, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, const size_t shard_count, const size_t max_groups_buffer_entry_count, RenderInfo render_info, const ApproxQuantileDescriptors &, const CountDistinctDescriptors, const bool must_use_baseline_sort, const bool output_columnar_hint, const bool streaming_top_n_hint, const bool threads_can_reuse_group_by_buffers)

static bool	many_entries (const int64_t max_val, const int64_t min_val, const int64_t bucket)

static bool	countDescriptorsLogicallyEmpty (const CountDistinctDescriptors &count_distinct_descriptors)

static int8_t	pick_target_compact_width (const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const int8_t crt_min_byte_width)

Protected Member Functions
void	resetGroupColWidths (const std::vector< int8_t > &new_group_col_widths)

Private Member Functions
size_t	getTotalBytesOfColumnarBuffers () const

size_t	getTotalBytesOfColumnarBuffers (const size_t num_entries_per_column) const

size_t	getTotalBytesOfColumnarProjections (const size_t projection_count) const

Private Attributes
const Executor *	executor_

bool	allow_multifrag_

QueryDescriptionType	query_desc_type_

bool	keyless_hash_

bool	interleaved_bins_on_gpu_

int32_t	idx_target_as_key_

std::vector< int8_t >	group_col_widths_

int8_t	group_col_compact_width_

std::vector< int64_t >	target_groupby_indices_

size_t	entry_count_

int64_t	min_val_

int64_t	max_val_

int64_t	bucket_

bool	has_nulls_

ApproxQuantileDescriptors	approx_quantile_descriptors_

CountDistinctDescriptors	count_distinct_descriptors_

bool	sort_on_gpu_

bool	output_columnar_

bool	render_output_

bool	must_use_baseline_sort_

bool	use_streaming_top_n_

bool	threads_can_reuse_group_by_buffers_

bool	force_4byte_float_

ColSlotContext	col_slot_context_

size_t	num_available_threads_ {1}

Friends
class	ResultSet

class	QueryExecutionContext

Detailed Description

Definition at line 68 of file QueryMemoryDescriptor.h.

Constructor & Destructor Documentation

QueryMemoryDescriptor::QueryMemoryDescriptor ( )

Definition at line 554 of file QueryMemoryDescriptor.cpp.

     : executor_(nullptr)
     , allow_multifrag_(false)
     , query_desc_type_(QueryDescriptionType::Projection)
     , keyless_hash_(false)
     , interleaved_bins_on_gpu_(false)
     , idx_target_as_key_(0)
     , group_col_compact_width_(0)
     , entry_count_(0)
     , min_val_(0)
     , max_val_(0)
     , bucket_(0)
     , has_nulls_(false)
     , sort_on_gpu_(false)
     , output_columnar_(false)
     , render_output_(false)
     , must_use_baseline_sort_(false)
     , use_streaming_top_n_(false)
     , threads_can_reuse_group_by_buffers_(false)
     , force_4byte_float_(false) {}

QueryMemoryDescriptor::QueryMemoryDescriptor	(	const Executor *	executor,
		const RelAlgExecutionUnit &	ra_exe_unit,
		const std::vector< InputTableInfo > &	query_infos,
		const bool	allow_multifrag,
		const bool	keyless_hash,
		const bool	interleaved_bins_on_gpu,
		const int32_t	idx_target_as_key,
		const ColRangeInfo &	col_range_info,
		const ColSlotContext &	col_slot_context,
		const std::vector< int8_t > &	group_col_widths,
		const int8_t	group_col_compact_width,
		const std::vector< int64_t > &	target_groupby_indices,
		const size_t	entry_count,
		const ApproxQuantileDescriptors &	approx_quantile_descriptors,
		const CountDistinctDescriptors	count_distinct_descriptors,
		const bool	sort_on_gpu_hint,
		const bool	output_columnar,
		const bool	render_output,
		const bool	must_use_baseline_sort,
		const bool	use_streaming_top_n,
		const bool	threads_can_reuse_group_by_buffers
	)

Definition at line 453 of file QueryMemoryDescriptor.cpp.

References canOutputColumnar(), CHECK, col_slot_context_, count_distinct_descriptors_, countDescriptorsLogicallyEmpty(), executor_, streaming_top_n::get_heap_size(), getEntryCount(), getRowSize(), heavyai::GroupByBaselineHash, heavyai::GroupByPerfectHash, isLogicalSizedColumnsAllowed(), keyless_hash_, heavyai::NonGroupedAggregate, output_columnar_, heavyai::Projection, query_desc_type_, ColSlotContext::setAllSlotsPaddedSizeToLogicalSize(), ColSlotContext::setAllUnsetSlotsPaddedSize(), sort_on_gpu_, heavyai::TableFunction, RelAlgExecutionUnit::target_exprs, RelAlgExecutionUnit::use_bump_allocator, use_streaming_top_n_, and ColSlotContext::validate().

     : executor_(executor)
     , allow_multifrag_(allow_multifrag)
     , query_desc_type_(col_range_info.hash_type_)
     , keyless_hash_(keyless_hash)
     , interleaved_bins_on_gpu_(interleaved_bins_on_gpu)
     , idx_target_as_key_(idx_target_as_key)
     , group_col_widths_(group_col_widths)
     , group_col_compact_width_(group_col_compact_width)
     , target_groupby_indices_(target_groupby_indices)
     , entry_count_(entry_count)
     , min_val_(col_range_info.min)
     , max_val_(col_range_info.max)
     , bucket_(col_range_info.bucket)
     , has_nulls_(col_range_info.has_nulls)
     , approx_quantile_descriptors_(approx_quantile_descriptors)
     , count_distinct_descriptors_(count_distinct_descriptors)
     , output_columnar_(false)
     , render_output_(render_output)
     , must_use_baseline_sort_(must_use_baseline_sort)
     , use_streaming_top_n_(use_streaming_top_n)
     , threads_can_reuse_group_by_buffers_(threads_can_reuse_group_by_buffers)
     , force_4byte_float_(false)
     , col_slot_context_(col_slot_context)
     , num_available_threads_(cpu_threads()) {
   CHECK(!(query_desc_type_ == QueryDescriptionType::TableFunction));
   col_slot_context_.setAllUnsetSlotsPaddedSize(8);
   col_slot_context_.validate();
 
   sort_on_gpu_ = sort_on_gpu_hint && canOutputColumnar() && !keyless_hash_;
   if (sort_on_gpu_) {
     CHECK(!ra_exe_unit.use_bump_allocator);
     output_columnar_ = true;
   } else {
     switch (query_desc_type_) {
       case QueryDescriptionType::Projection:
         output_columnar_ = output_columnar_hint;
         break;
       case QueryDescriptionType::GroupByPerfectHash:
         output_columnar_ = output_columnar_hint &&
                            QueryMemoryDescriptor::countDescriptorsLogicallyEmpty(
                                count_distinct_descriptors_) &&
                            !any_of<kAPPROX_QUANTILE, kMODE>(ra_exe_unit.target_exprs);
         break;
       case QueryDescriptionType::GroupByBaselineHash:
         output_columnar_ = output_columnar_hint;
         break;
       case QueryDescriptionType::NonGroupedAggregate:
         output_columnar_ = output_columnar_hint &&
                            QueryMemoryDescriptor::countDescriptorsLogicallyEmpty(
                                count_distinct_descriptors_) &&
                            !any_of<kAPPROX_QUANTILE, kMODE>(ra_exe_unit.target_exprs);
         break;
       default:
         output_columnar_ = false;
         break;
     }
   }
 
   if (isLogicalSizedColumnsAllowed()) {
     // TODO(adb): Ensure fixed size buffer allocations are correct with all logical column
     // sizes
     CHECK(!ra_exe_unit.use_bump_allocator);
     col_slot_context_.setAllSlotsPaddedSizeToLogicalSize();
     col_slot_context_.validate();
   }
 
 #ifdef HAVE_CUDA
   // Check Streaming Top N heap usage, bail if > max slab size, CUDA ONLY
   if (use_streaming_top_n_ && executor->getDataMgr()->gpusPresent()) {
     const auto thread_count = executor->blockSize() * executor->gridSize();
     const auto total_buff_size =
         streaming_top_n::get_heap_size(getRowSize(), getEntryCount(), thread_count);
     if (total_buff_size > executor_->maxGpuSlabSize()) {
       throw StreamingTopNOOM(total_buff_size);
     }
   }
 #endif
 }

Here is the call graph for this function:

QueryMemoryDescriptor::QueryMemoryDescriptor	(	const Executor *	executor,
		const size_t	entry_count,
		const QueryDescriptionType	query_desc_type
	)

Definition at line 575 of file QueryMemoryDescriptor.cpp.

References output_columnar_, and heavyai::TableFunction.

     : executor_(executor)
     , allow_multifrag_(false)
     , query_desc_type_(query_desc_type)
     , keyless_hash_(false)
     , interleaved_bins_on_gpu_(false)
     , idx_target_as_key_(0)
     , group_col_compact_width_(0)
     , entry_count_(entry_count)
     , min_val_(0)
     , max_val_(0)
     , bucket_(0)
     , has_nulls_(false)
     , sort_on_gpu_(false)
     , output_columnar_(false)
     , render_output_(false)
     , must_use_baseline_sort_(false)
     , use_streaming_top_n_(false)
     , threads_can_reuse_group_by_buffers_(false)
     , force_4byte_float_(false)
     , num_available_threads_(cpu_threads()) {
   if (query_desc_type == QueryDescriptionType::TableFunction) {
     // Table functions output columns are always columnar
     output_columnar_ = true;
   }
 }

QueryMemoryDescriptor::QueryMemoryDescriptor	(	const QueryDescriptionType	query_desc_type,
		const int64_t	min_val,
		const int64_t	max_val,
		const bool	has_nulls,
		const std::vector< int8_t > &	group_col_widths
	)

Definition at line 604 of file QueryMemoryDescriptor.cpp.

     : executor_(nullptr)
     , allow_multifrag_(false)
     , query_desc_type_(query_desc_type)
     , keyless_hash_(false)
     , interleaved_bins_on_gpu_(false)
     , idx_target_as_key_(0)
     , group_col_widths_(group_col_widths)
     , group_col_compact_width_(0)
     , entry_count_(0)
     , min_val_(min_val)
     , max_val_(max_val)
     , bucket_(0)
     , has_nulls_(false)
     , sort_on_gpu_(false)
     , output_columnar_(false)
     , render_output_(false)
     , must_use_baseline_sort_(false)
     , use_streaming_top_n_(false)
     , threads_can_reuse_group_by_buffers_(false)
     , force_4byte_float_(false)
     , num_available_threads_(cpu_threads()) {}

QueryMemoryDescriptor::QueryMemoryDescriptor ( const TResultSetBufferDescriptor & thrift_query_memory_descriptor )

Member Function Documentation

void QueryMemoryDescriptor::addColSlotInfo ( const std::vector< std::tuple< int8_t, int8_t >> & slots_for_col )

Definition at line 1224 of file QueryMemoryDescriptor.cpp.

References ColSlotContext::addColumn(), and col_slot_context_.

Referenced by TableFunctionManager::allocate_output_buffers(), ResultSetLogicalValuesBuilder::create(), and TableFunctionExecutionContext::launchGpuCode().

                                                               {
   col_slot_context_.addColumn(slots_for_col);
 }

Here is the call graph for this function:

Here is the caller graph for this function:

void QueryMemoryDescriptor::addColSlotInfoFlatBuffer ( const int64_t flatbuffer_size )

Definition at line 1229 of file QueryMemoryDescriptor.cpp.

References ColSlotContext::addColumnFlatBuffer(), and col_slot_context_.

Referenced by TableFunctionManager::allocate_output_buffers().

                                                                                   {
   col_slot_context_.addColumnFlatBuffer(flatbuffer_size);
 }

Here is the call graph for this function:

Here is the caller graph for this function:

void QueryMemoryDescriptor::alignPaddedSlots ( )

Definition at line 1237 of file QueryMemoryDescriptor.cpp.

References ColSlotContext::alignPaddedSlots(), col_slot_context_, and sortOnGpu().

                                              {
   col_slot_context_.alignPaddedSlots(sortOnGpu());
 }

Here is the call graph for this function:

bool QueryMemoryDescriptor::blocksShareMemory ( ) const

Definition at line 1144 of file QueryMemoryDescriptor.cpp.

References bucket_, count_distinct_descriptors_, countDescriptorsLogicallyEmpty(), executor_, g_cluster, getGroupbyColCount(), heavyai::GroupByBaselineHash, heavyai::GroupByPerfectHash, many_entries(), max_val_, min_val_, heavyai::Projection, query_desc_type_, render_output_, and heavyai::TableFunction.

Referenced by canOutputColumnar(), ResultSetReductionJIT::codegen(), QueryMemoryInitializer::computeNumberOfBuffers(), copy_group_by_buffers_from_gpu(), create_dev_group_by_buffers(), and toString().

                                                     {
   if (g_cluster) {
     return true;
   }
   if (!countDescriptorsLogicallyEmpty(count_distinct_descriptors_)) {
     return true;
   }
   if (executor_->isCPUOnly() || render_output_ ||
       query_desc_type_ == QueryDescriptionType::GroupByBaselineHash ||
       query_desc_type_ == QueryDescriptionType::Projection ||
       query_desc_type_ == QueryDescriptionType::TableFunction ||
       (query_desc_type_ == QueryDescriptionType::GroupByPerfectHash &&
        getGroupbyColCount() > 1)) {
     return true;
   }
   return query_desc_type_ == QueryDescriptionType::GroupByPerfectHash &&
          many_entries(max_val_, min_val_, bucket_);
 }

Here is the call graph for this function:

Here is the caller graph for this function:

bool QueryMemoryDescriptor::canOutputColumnar ( ) const

Definition at line 1241 of file QueryMemoryDescriptor.cpp.

References blocksShareMemory(), count_distinct_descriptors_, countDescriptorsLogicallyEmpty(), GPU, interleavedBins(), threadsShareMemory(), and usesGetGroupValueFast().

Referenced by QueryMemoryDescriptor().

                                                     {
   return usesGetGroupValueFast() && threadsShareMemory() && blocksShareMemory() &&
          !interleavedBins(ExecutorDeviceType::GPU) &&
          countDescriptorsLogicallyEmpty(count_distinct_descriptors_);
 }

Here is the call graph for this function:

Here is the caller graph for this function:

bool QueryMemoryDescriptor::canUsePerDeviceCardinality ( const RelAlgExecutionUnit & ra_exe_unit ) const

Definition at line 1383 of file QueryMemoryDescriptor.cpp.

References anonymous_namespace{QueryMemoryDescriptor.cpp}::any_of(), RelAlgExecutionUnit::join_quals, LEFT, heavyai::Projection, query_desc_type_, and RelAlgExecutionUnit::target_exprs_union.

                                                   {
   // union-query needs to consider the "SUM" of each subquery's result
   if (query_desc_type_ != QueryDescriptionType::Projection ||
       !ra_exe_unit.target_exprs_union.empty()) {
     return false;
   }
   auto is_left_join = [](auto& join_qual) { return join_qual.type == JoinType::LEFT; };
   auto& join_quals = ra_exe_unit.join_quals;
   return !std::any_of(join_quals.begin(), join_quals.end(), is_left_join);
 }

Here is the call graph for this function:

bool QueryMemoryDescriptor::checkSlotUsesFlatBufferFormat ( const size_t slot_idx ) const

inline

Definition at line 234 of file QueryMemoryDescriptor.h.

References ColSlotContext::checkSlotUsesFlatBufferFormat(), and col_slot_context_.

Referenced by ResultSet::checkSlotUsesFlatBufferFormat(), getPaddedSlotBufferSize(), and target_exprs_to_infos().

                                                                   {
     return col_slot_context_.checkSlotUsesFlatBufferFormat(slot_idx);
   }

Here is the call graph for this function:

Here is the caller graph for this function:

void QueryMemoryDescriptor::clearGroupColWidths ( )

inline

Definition at line 198 of file QueryMemoryDescriptor.h.

References group_col_widths_.

198 { group_col_widths_.clear(); }

QueryMemoryDescriptor::group_col_widths_

std::vector< int8_t > group_col_widths_

Definition: QueryMemoryDescriptor.h:398

void QueryMemoryDescriptor::clearSlotInfo ( )

Definition at line 1233 of file QueryMemoryDescriptor.cpp.

References ColSlotContext::clear(), and col_slot_context_.

                                           {
   col_slot_context_.clear();
 }

Here is the call graph for this function:

void QueryMemoryDescriptor::clearTargetGroupbyIndices ( )

inline

Definition at line 259 of file QueryMemoryDescriptor.h.

References target_groupby_indices_.

259 { target_groupby_indices_.clear(); }

QueryMemoryDescriptor::target_groupby_indices_

std::vector< int64_t > target_groupby_indices_

Definition: QueryMemoryDescriptor.h:402

static bool QueryMemoryDescriptor::countDescriptorsLogicallyEmpty ( const CountDistinctDescriptors & count_distinct_descriptors )

inlinestatic

Definition at line 153 of file QueryMemoryDescriptor.h.

References Invalid.

Referenced by blocksShareMemory(), canOutputColumnar(), countDistinctDescriptorsLogicallyEmpty(), lazyInitGroups(), and QueryMemoryDescriptor().

                                                                   {
     return std::all_of(count_distinct_descriptors.begin(),
                        count_distinct_descriptors.end(),
                        [](const CountDistinctDescriptor& desc) {
                          return desc.impl_type_ == CountDistinctImplType::Invalid;
                        });
   }

Here is the caller graph for this function:

bool QueryMemoryDescriptor::countDistinctDescriptorsLogicallyEmpty ( ) const

inline

Definition at line 162 of file QueryMemoryDescriptor.h.

References count_distinct_descriptors_, and countDescriptorsLogicallyEmpty().

Referenced by QueryMemoryInitializer::allocateCountDistinctGpuMem(), and anonymous_namespace{QueryMemoryInitializer.cpp}::collect_target_expr_metadata().

                                                       {
     return countDescriptorsLogicallyEmpty(count_distinct_descriptors_);
   }

Here is the call graph for this function:

Here is the caller graph for this function:

bool QueryMemoryDescriptor::didOutputColumnar ( ) const

inline

Definition at line 285 of file QueryMemoryDescriptor.h.

References output_columnar_.

285 { return output_columnar_; }

QueryMemoryDescriptor::output_columnar_

bool output_columnar_

Definition: QueryMemoryDescriptor.h:412

Here is the caller graph for this function:

bool QueryMemoryDescriptor::forceFourByteFloat ( ) const

inline

Definition at line 304 of file QueryMemoryDescriptor.h.

References force_4byte_float_.

Referenced by ResultSet::makeTargetValue().

304 { return force_4byte_float_; }

QueryMemoryDescriptor::force_4byte_float_

bool force_4byte_float_

Definition: QueryMemoryDescriptor.h:417

Here is the caller graph for this function:

const ApproxQuantileDescriptors& QueryMemoryDescriptor::getApproxQuantileDescriptors ( ) const

inline

Definition at line 270 of file QueryMemoryDescriptor.h.

References approx_quantile_descriptors_.

Referenced by QueryMemoryInitializer::allocateTDigestsBuffer(), QueryMemoryInitializer::initColumnsPerRow(), and QueryMemoryInitializer::QueryMemoryInitializer().

                                                                         {
     return approx_quantile_descriptors_;
   }

Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getAvailableCpuThreads ( ) const

inline

Definition at line 372 of file QueryMemoryDescriptor.h.

References num_available_threads_.

Referenced by QueryMemoryInitializer::initRowGroups().

372 { return num_available_threads_; }

QueryMemoryDescriptor::num_available_threads_

size_t num_available_threads_

Definition: QueryMemoryDescriptor.h:425

Here is the caller graph for this function:

int64_t QueryMemoryDescriptor::getBucket ( ) const

inline

Definition at line 266 of file QueryMemoryDescriptor.h.

References bucket_.

Referenced by GroupByAndAggregate::codegenGroupBy(), and GroupByAndAggregate::codegenSingleColumnPerfectHash().

266 { return bucket_; }

QueryMemoryDescriptor::bucket_

int64_t bucket_

Definition: QueryMemoryDescriptor.h:407

Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getBufferColSlotCount ( ) const

Definition at line 1124 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, ColSlotContext::getSlotCount(), and target_groupby_indices_.

Referenced by anonymous_namespace{ResultSetIteration.cpp}::advance_col_buff_to_slot(), QueryMemoryInitializer::copyFromTableFunctionGpuBuffers(), anonymous_namespace{ResultSetReduction.cpp}::fill_slots(), ResultSetStorage::fillOneEntryColWise(), ResultSetStorage::fillOneEntryRowWise(), and QueryMemoryInitializer::setupTableFunctionGpuBuffers().

                                                           {
   size_t total_slot_count = col_slot_context_.getSlotCount();
 
   if (target_groupby_indices_.empty()) {
     return total_slot_count;
   }
   return total_slot_count - std::count_if(target_groupby_indices_.begin(),
                                           target_groupby_indices_.end(),
                                           [](const int64_t i) { return i >= 0; });
 }

Here is the call graph for this function:

Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getBufferSizeBytes	(	const RelAlgExecutionUnit &	ra_exe_unit,
		const unsigned	thread_count,
		const ExecutorDeviceType	device_type
	)		const

Definition at line 1047 of file QueryMemoryDescriptor.cpp.

References entry_count_, streaming_top_n::get_heap_size(), getRowSize(), SortInfo::limit, anonymous_namespace{Utm.h}::n, SortInfo::offset, RelAlgExecutionUnit::sort_info, and use_streaming_top_n_.

Referenced by QueryMemoryInitializer::applyStreamingTopNOffsetCpu(), QueryMemoryInitializer::copyGroupByBuffersFromGpu(), create_dev_group_by_buffers(), QueryMemoryInitializer::createAndInitializeGroupByBufferGpu(), getBufferSizeBytes(), Executor::launchKernelsViaResourceMgr(), and QueryMemoryInitializer::QueryMemoryInitializer().

                                                 {
   if (use_streaming_top_n_) {
     const size_t n =
         ra_exe_unit.sort_info.offset + ra_exe_unit.sort_info.limit.value_or(0);
     return streaming_top_n::get_heap_size(getRowSize(), n, thread_count);
   }
   return getBufferSizeBytes(device_type, entry_count_);
 }

Here is the call graph for this function:

Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getBufferSizeBytes ( const ExecutorDeviceType device_type ) const

Definition at line 1100 of file QueryMemoryDescriptor.cpp.

References entry_count_, and getBufferSizeBytes().

                                                 {
   return getBufferSizeBytes(device_type, entry_count_);
 }

Here is the call graph for this function:

size_t QueryMemoryDescriptor::getBufferSizeBytes	(	const ExecutorDeviceType	device_type,
		const size_t	entry_count
	)		const

Returns total amount of output buffer memory for each device (CPU/GPU)

Columnar: if projection: it returns index buffer + columnar buffer (all non-lazy columns) if table function: only the columnar buffer if group by: it returns the amount required for each group column (assumes 64-bit per group) + columnar buffer (all involved agg columns)

Row-wise: returns required memory per row multiplied by number of entries

Definition at line 1071 of file QueryMemoryDescriptor.cpp.

References align_to_int64(), CHECK_GE, executor_, getColsSize(), getRowSize(), getTotalBytesOfColumnarBuffers(), group_col_widths_, interleavedBins(), keyless_hash_, output_columnar_, heavyai::Projection, query_desc_type_, and heavyai::TableFunction.

                                                                                  {
   if (keyless_hash_ && !output_columnar_) {
     CHECK_GE(group_col_widths_.size(), size_t(1));
     auto row_bytes = align_to_int64(getColsSize());
     return (interleavedBins(device_type) ? executor_->warpSize() : 1) * entry_count *
            row_bytes;
   }
   constexpr size_t row_index_width = sizeof(int64_t);
   size_t total_bytes{0};
   if (output_columnar_) {
     switch (query_desc_type_) {
       case QueryDescriptionType::Projection:
         total_bytes = row_index_width * entry_count + getTotalBytesOfColumnarBuffers();
         break;
       case QueryDescriptionType::TableFunction:
         total_bytes = getTotalBytesOfColumnarBuffers();
         break;
       default:
         total_bytes = sizeof(int64_t) * group_col_widths_.size() * entry_count +
                       getTotalBytesOfColumnarBuffers();
         break;
     }
   } else {
     total_bytes = getRowSize() * entry_count;
   }
   return total_bytes;
 }

Here is the call graph for this function:

size_t QueryMemoryDescriptor::getColCount ( ) const

Definition at line 1181 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getColCount().

                                                 {
   return col_slot_context_.getColCount();
 }

Here is the call graph for this function:

size_t QueryMemoryDescriptor::getColOffInBytes ( const size_t col_idx ) const

Definition at line 905 of file QueryMemoryDescriptor.cpp.

References align_to_int64(), CHECK, CHECK_EQ, CHECK_GT, entry_count_, getColOnlyOffInBytes(), getEffectiveKeyWidth(), getFlatBufferSize(), getPaddedSlotWidthBytes(), getPrependedGroupBufferSizeInBytes(), getWarpCount(), group_col_widths_, heavyai::GroupByPerfectHash, keyless_hash_, output_columnar_, query_desc_type_, and heavyai::TableFunction.

Referenced by TargetExprCodegen::codegen(), GroupByAndAggregate::codegenAggColumnPtr(), GroupByAndAggregate::codegenOutputSlot(), anonymous_namespace{QueryMemoryInitializer.cpp}::compact_projection_buffer_for_cpu_columnar(), copy_projection_buffer_from_gpu_columnar(), get_cols_ptr(), QueryExecutionContext::groupBufferToDeinterleavedResults(), QueryMemoryInitializer::initRowGroups(), inplace_sort_gpu(), and anonymous_namespace{Execute.cpp}::permute_storage_columnar().

                                                                          {
   const auto warp_count = getWarpCount();
   if (output_columnar_) {
     CHECK_EQ(size_t(1), warp_count);
     size_t offset{0};
     if (!keyless_hash_) {
       offset += getPrependedGroupBufferSizeInBytes();
     }
     if (query_desc_type_ == QueryDescriptionType::TableFunction) {
       for (size_t index = 0; index < col_idx; ++index) {
         int8_t column_width = getPaddedSlotWidthBytes(index);
         if (column_width > 0) {
           offset += align_to_int64(column_width * entry_count_);
         } else {
           int64_t flatbuffer_size = getFlatBufferSize(index);
           CHECK_GT(flatbuffer_size, 0);
           offset += align_to_int64(flatbuffer_size);
         }
       }
     } else {
       for (size_t index = 0; index < col_idx; ++index) {
         offset += align_to_int64(getPaddedSlotWidthBytes(index) * entry_count_);
       }
     }
     return offset;
   }
 
   size_t offset{0};
   if (keyless_hash_) {
     // ignore, there's no group column in the output buffer
     CHECK(query_desc_type_ == QueryDescriptionType::GroupByPerfectHash);
   } else {
     offset += group_col_widths_.size() * getEffectiveKeyWidth();
     offset = align_to_int64(offset);
   }
   offset += getColOnlyOffInBytes(col_idx);
   return offset;
 }

Here is the call graph for this function:

Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getColOffInBytesInNextBin ( const size_t col_idx ) const

Definition at line 985 of file QueryMemoryDescriptor.cpp.

References CHECK_EQ, getPaddedSlotWidthBytes(), getRowSize(), getWarpCount(), group_col_widths_, and output_columnar_.

Referenced by QueryExecutionContext::groupBufferToDeinterleavedResults().

                                                                                   {
   auto warp_count = getWarpCount();
   if (output_columnar_) {
     CHECK_EQ(size_t(1), group_col_widths_.size());
     CHECK_EQ(size_t(1), warp_count);
     return getPaddedSlotWidthBytes(col_idx);
   }
 
   return warp_count * getRowSize();
 }

Here is the call graph for this function:

Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getColOnlyOffInBytes ( const size_t col_idx ) const

Definition at line 892 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getColOnlyOffInBytes().

Referenced by TargetExprCodegen::codegen(), GroupByAndAggregate::codegenAggColumnPtr(), TargetExprCodegen::codegenAggregate(), getColOffInBytes(), and ResultSetStorage::reduceSingleRow().

                                                                              {
   return col_slot_context_.getColOnlyOffInBytes(col_idx);
 }

Here is the call graph for this function:

Here is the caller graph for this function:

const ColSlotContext& QueryMemoryDescriptor::getColSlotContext ( ) const

inline

Definition at line 319 of file QueryMemoryDescriptor.h.

References col_slot_context_.

Referenced by QueryMemoryInitializer::copyFromTableFunctionGpuBuffers(), ResultSetStorage::reduceEntriesNoCollisionsColWise(), ResultSetReductionJIT::reduceOneEntryTargetsNoCollisions(), and QueryMemoryInitializer::setupTableFunctionGpuBuffers().

319 { return col_slot_context_; }

QueryMemoryDescriptor::col_slot_context_

ColSlotContext col_slot_context_

Definition: QueryMemoryDescriptor.h:419

Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getColsSize ( ) const

Definition at line 831 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getAllSlotsAlignedPaddedSize().

Referenced by QueryExecutionContext::copyInitAggValsToDevice(), getBufferSizeBytes(), getRowSize(), QueryExecutionContext::launchCpuCode(), and QueryExecutionContext::sizeofInitAggVals().

                                                 {
   return col_slot_context_.getAllSlotsAlignedPaddedSize();
 }

Here is the call graph for this function:

Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getCompactByteWidth ( ) const

Definition at line 853 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getCompactByteWidth().

Referenced by anonymous_namespace{TargetExprBuilder.cpp}::get_initial_agg_val(), and init_agg_val_vec().

                                                         {
   return col_slot_context_.getCompactByteWidth();
 }

Here is the call graph for this function:

Here is the caller graph for this function:

const CountDistinctDescriptor& QueryMemoryDescriptor::getCountDistinctDescriptor ( const size_t idx ) const

inline

Definition at line 274 of file QueryMemoryDescriptor.h.

References CHECK_LT, and count_distinct_descriptors_.

Referenced by QueryMemoryInitializer::allocateCountDistinctBuffers(), QueryMemoryInitializer::allocateCountDistinctGpuMem(), QueryMemoryInitializer::calculateCountDistinctBufferSize(), anonymous_namespace{QueryMemoryInitializer.cpp}::check_count_distinct_expr_metadata(), anonymous_namespace{QueryMemoryInitializer.cpp}::check_total_bitmap_memory(), GroupByAndAggregate::codegenCountDistinct(), count_distinct_set_union_jit_rt(), anonymous_namespace{Execute.cpp}::fill_entries_for_empty_input(), ResultSet::makeTargetValue(), ResultSetStorage::reduceOneCountDistinctSlot(), and ResultSetStorage::reduceSingleRow().

                                                                                     {
     CHECK_LT(idx, count_distinct_descriptors_.size());
     return count_distinct_descriptors_[idx];
   }

Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getCountDistinctDescriptorsSize ( ) const

inline

Definition at line 278 of file QueryMemoryDescriptor.h.

References count_distinct_descriptors_.

Referenced by QueryMemoryInitializer::allocateCountDistinctGpuMem(), anonymous_namespace{QueryMemoryInitializer.cpp}::check_total_bitmap_memory(), ResultSetReductionJIT::reduceOneApproxQuantileSlot(), ResultSetStorage::reduceOneApproxQuantileSlot(), ResultSetReductionJIT::reduceOneCountDistinctSlot(), ResultSetStorage::reduceOneCountDistinctSlot(), and ResultSetReductionJIT::reduceOneModeSlot().

                                                  {
     return count_distinct_descriptors_.size();
   }

Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getEffectiveKeyWidth ( ) const

inline

Definition at line 347 of file QueryMemoryDescriptor.h.

References group_col_compact_width_.

Referenced by ResultSetStorage::binSearchRowCount(), GroupByAndAggregate::codegenEstimator(), GroupByAndAggregate::codegenGroupBy(), QueryMemoryInitializer::createAndInitializeGroupByBufferGpu(), ResultSetStorage::fillOneEntryRowWise(), get_key_bytes_rowwise(), getColOffInBytes(), getRowSize(), ResultSet::getTargetValueFromBufferColwise(), ResultSet::getTargetValueFromBufferRowwise(), ResultSetStorage::initializeRowWise(), QueryMemoryInitializer::initRowGroups(), ResultSetReductionJIT::isEmpty(), ResultSetStorage::isEmptyEntry(), ResultSetStorage::moveEntriesToBuffer(), QueryMemoryInitializer::prepareTopNHeapsDevBuffer(), and reductionKey().

                                              {
     return group_col_compact_width_ ? group_col_compact_width_ : sizeof(int64_t);
   }

Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getEntryCount ( ) const

inline

Definition at line 261 of file QueryMemoryDescriptor.h.

References entry_count_.

261 { return entry_count_; }

QueryMemoryDescriptor::entry_count_

size_t entry_count_

Definition: QueryMemoryDescriptor.h:403

Here is the caller graph for this function:

const Executor* QueryMemoryDescriptor::getExecutor ( ) const

inline

Definition at line 171 of file QueryMemoryDescriptor.h.

References executor_.

Referenced by anonymous_namespace{Execute.cpp}::build_row_for_empty_input(), ResultSetReductionJIT::codegen(), anonymous_namespace{Execute.cpp}::fill_entries_for_empty_input(), ResultSet::getExecutor(), ResultSet::getVarlenOrderEntry(), ResultSet::makeGeoTargetValue(), and ResultSet::makeVarlenTargetValue().

171 { return executor_; }

QueryMemoryDescriptor::executor_

const Executor * executor_

Definition: QueryMemoryDescriptor.h:389

Here is the caller graph for this function:

int64_t QueryMemoryDescriptor::getFlatBufferSize ( const size_t slot_idx ) const

inline

Definition at line 231 of file QueryMemoryDescriptor.h.

References col_slot_context_, and ColSlotContext::getFlatBufferSize().

Referenced by getColOffInBytes(), and getPaddedSlotBufferSize().

                                                          {
     return col_slot_context_.getFlatBufferSize(slot_idx);
   }

Here is the call graph for this function:

Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getGroupbyColCount ( ) const

inline

Definition at line 308 of file QueryMemoryDescriptor.h.

References group_col_widths_.

308 { return group_col_widths_.size(); }

QueryMemoryDescriptor::group_col_widths_

std::vector< int8_t > group_col_widths_

Definition: QueryMemoryDescriptor.h:398

Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getKeyCount ( ) const

inline

Definition at line 309 of file QueryMemoryDescriptor.h.

References getGroupbyColCount(), and keyless_hash_.

Referenced by anonymous_namespace{Execute.cpp}::permute_storage_columnar().

309 { return keyless_hash_ ? 0 : getGroupbyColCount(); }

QueryMemoryDescriptor::getGroupbyColCount

size_t getGroupbyColCount() const

Definition: QueryMemoryDescriptor.h:308

QueryMemoryDescriptor::keyless_hash_

bool keyless_hash_

Definition: QueryMemoryDescriptor.h:392

Here is the call graph for this function:

Here is the caller graph for this function:

const int8_t QueryMemoryDescriptor::getLogicalSlotWidthBytes ( const size_t slot_idx ) const

Definition at line 1198 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, ColSlotContext::getSlotInfo(), and SlotSize::logical_size.

Referenced by QueryMemoryInitializer::allocateTDigestsBuffer(), anonymous_namespace{QueryMemoryInitializer.cpp}::check_count_distinct_expr_metadata(), TargetExprCodegen::codegenAggregate(), ResultSet::getTargetValueFromBufferRowwise(), and QueryMemoryInitializer::initializeQuantileParams().

                                  {
   return col_slot_context_.getSlotInfo(slot_idx).logical_size;
 }

Here is the call graph for this function:

Here is the caller graph for this function:

std::optional< size_t > QueryMemoryDescriptor::getMaxPerDeviceCardinality ( const RelAlgExecutionUnit & ra_exe_unit ) const

Definition at line 1372 of file QueryMemoryDescriptor.cpp.

References anonymous_namespace{Utm.h}::a, and RelAlgExecutionUnit::per_device_cardinality.

                                                   {
   auto& pdc = ra_exe_unit.per_device_cardinality;
   auto by_cardinality = [](auto& a, auto& b) { return a.second < b.second; };
   auto itr = std::max_element(pdc.begin(), pdc.end(), by_cardinality);
   if (itr != pdc.end() && itr->second > 0) {
     return itr->second;
   }
   return std::nullopt;
 }

int64_t QueryMemoryDescriptor::getMaxVal ( ) const

inline

Definition at line 265 of file QueryMemoryDescriptor.h.

References max_val_.

Referenced by GroupByAndAggregate::codegenGroupBy().

265 { return max_val_; }

QueryMemoryDescriptor::max_val_

int64_t max_val_

Definition: QueryMemoryDescriptor.h:406

Here is the caller graph for this function:

int64_t QueryMemoryDescriptor::getMinVal ( ) const

inline

Definition at line 264 of file QueryMemoryDescriptor.h.

References min_val_.

Referenced by GroupByAndAggregate::codegenSingleColumnPerfectHash().

264 { return min_val_; }

QueryMemoryDescriptor::min_val_

int64_t min_val_

Definition: QueryMemoryDescriptor.h:404

Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getNextColOffInBytes	(	const int8_t *	col_ptr,
		const size_t	bin,
		const size_t	col_idx
	)		const

Definition at line 996 of file QueryMemoryDescriptor.cpp.

References align_to_int64(), CHECK, CHECK_EQ, entry_count_, getPaddedSlotWidthBytes(), getSlotCount(), getWarpCount(), group_col_widths_, and output_columnar_.

                                                                                {
   CHECK(!output_columnar_ || bin < entry_count_);
   size_t offset{0};
   auto warp_count = getWarpCount();
   const auto chosen_bytes = getPaddedSlotWidthBytes(col_idx);
   const auto total_slot_count = getSlotCount();
   if (col_idx + 1 == total_slot_count) {
     if (output_columnar_) {
       return (entry_count_ - bin) * chosen_bytes;
     } else {
       return static_cast<size_t>(align_to_int64(col_ptr + chosen_bytes) - col_ptr);
     }
   }
 
   const auto next_chosen_bytes = getPaddedSlotWidthBytes(col_idx + 1);
   if (output_columnar_) {
     CHECK_EQ(size_t(1), group_col_widths_.size());
     CHECK_EQ(size_t(1), warp_count);
 
     offset = align_to_int64(entry_count_ * chosen_bytes);
 
     offset += bin * (next_chosen_bytes - chosen_bytes);
     return offset;
   }
 
   if (next_chosen_bytes == sizeof(int64_t)) {
     return static_cast<size_t>(align_to_int64(col_ptr + chosen_bytes) - col_ptr);
   } else {
     return chosen_bytes;
   }
 }

Here is the call graph for this function:

size_t QueryMemoryDescriptor::getNextColOffInBytesRowOnly	(	const int8_t *	col_ptr,
		const size_t	col_idx
	)		const

Definition at line 1030 of file QueryMemoryDescriptor.cpp.

References align_to_int64(), getPaddedSlotWidthBytes(), and getSlotCount().

Referenced by QueryMemoryInitializer::initColumnsPerRow().

                                                                                       {
   const auto chosen_bytes = getPaddedSlotWidthBytes(col_idx);
   const auto total_slot_count = getSlotCount();
   if (col_idx + 1 == total_slot_count) {
     return static_cast<size_t>(align_to_int64(col_ptr + chosen_bytes) - col_ptr);
   }
 
   const auto next_chosen_bytes = getPaddedSlotWidthBytes(col_idx + 1);
 
   if (next_chosen_bytes == sizeof(int64_t)) {
     return static_cast<size_t>(align_to_int64(col_ptr + chosen_bytes) - col_ptr);
   } else {
     return chosen_bytes;
   }
 }

Here is the call graph for this function:

Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getPaddedColWidthForRange	(	const size_t	offset,
		const size_t	range
	)		const

inline

Definition at line 214 of file QueryMemoryDescriptor.h.

References getPaddedSlotWidthBytes().

Referenced by result_set::get_byteoff_of_slot(), and ResultSet::makeGeoTargetValue().

                                                                                   {
     size_t ret = 0;
     for (size_t i = offset; i < offset + range; i++) {
       ret += static_cast<size_t>(getPaddedSlotWidthBytes(i));
     }
     return ret;
   }

Here is the call graph for this function:

Here is the caller graph for this function:

int64_t QueryMemoryDescriptor::getPaddedSlotBufferSize ( const size_t slot_idx ) const

Definition at line 944 of file QueryMemoryDescriptor.cpp.

References align_to_int64(), checkSlotUsesFlatBufferFormat(), entry_count_, getFlatBufferSize(), and getPaddedSlotWidthBytes().

Referenced by advance_to_next_columnar_target_buff().

                                                                                   {
   if (checkSlotUsesFlatBufferFormat(slot_idx)) {
     return align_to_int64(getFlatBufferSize(slot_idx));
   }
   int8_t column_width = getPaddedSlotWidthBytes(slot_idx);
   return align_to_int64(column_width * entry_count_);
 }

Here is the call graph for this function:

Here is the caller graph for this function:

const int8_t QueryMemoryDescriptor::getPaddedSlotWidthBytes ( const size_t slot_idx ) const

Definition at line 1189 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, ColSlotContext::getSlotInfo(), and SlotSize::padded_size.

Referenced by advance_target_ptr_row_wise(), TargetExprCodegen::codegen(), anonymous_namespace{GpuSharedMemoryUtils.cpp}::codegen_smem_dest_slot_ptr(), TargetExprCodegen::codegenAggregate(), GroupByAndAggregate::codegenOutputSlot(), compact_init_vals(), anonymous_namespace{QueryMemoryInitializer.cpp}::compact_projection_buffer_for_cpu_columnar(), copy_projection_buffer_from_gpu_columnar(), ResultSet::copyColumnIntoBuffer(), QueryMemoryInitializer::createAndInitializeGroupByBufferGpu(), ResultSet::eachCellInColumn(), Executor::executePlanWithoutGroupBy(), result_set::get_width_for_slot(), getColOffInBytes(), getColOffInBytesInNextBin(), getNextColOffInBytes(), getNextColOffInBytesRowOnly(), getPaddedColWidthForRange(), getPaddedSlotBufferSize(), ResultSet::getPaddedSlotWidthBytes(), ResultSet::getTargetValueFromBufferColwise(), ResultSet::getTargetValueFromBufferRowwise(), init_agg_val_vec(), QueryMemoryInitializer::initColumnarGroups(), QueryMemoryInitializer::initColumnsPerRow(), inplace_sort_gpu(), ResultSetReductionJIT::isEmpty(), ResultSetStorage::isEmptyEntry(), ResultSetStorage::isEmptyEntryColumnar(), ResultSet::makeGeoTargetValue(), TargetExprCodegenBuilder::operator()(), anonymous_namespace{Execute.cpp}::permute_storage_columnar(), ResultSetStorage::reduceEntriesNoCollisionsColWise(), ResultSetReductionJIT::reduceOneAggregateSlot(), ResultSetReductionJIT::reduceOneEntryTargetsNoCollisions(), ResultSetStorage::reduceOneSlot(), ResultSetStorage::reduceSingleRow(), and ResultSetStorage::rewriteAggregateBufferOffsets().

                                                                                        {
   return col_slot_context_.getSlotInfo(slot_idx).padded_size;
 }

Here is the call graph for this function:

Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getPrependedGroupBufferSizeInBytes ( ) const

Definition at line 974 of file QueryMemoryDescriptor.cpp.

References align_to_int64(), CHECK, getEntryCount(), getGroupbyColCount(), groupColWidth(), and output_columnar_.

Referenced by getColOffInBytes().

                                                                        {
   CHECK(output_columnar_);
   size_t buffer_size{0};
   for (size_t group_idx = 0; group_idx < getGroupbyColCount(); group_idx++) {
     buffer_size += align_to_int64(
         std::max(groupColWidth(group_idx), static_cast<int8_t>(sizeof(int64_t))) *
         getEntryCount());
   }
   return buffer_size;
 }

Here is the call graph for this function:

Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getPrependedGroupColOffInBytes ( const size_t group_idx ) const

Definition at line 956 of file QueryMemoryDescriptor.cpp.

References align_to_int64(), CHECK, getEntryCount(), getGroupbyColCount(), groupColWidth(), and output_columnar_.

Referenced by ResultSetStorage::copyKeyColWise(), ResultSetStorage::isEmptyEntryColumnar(), and anonymous_namespace{Execute.cpp}::permute_storage_columnar().

                                   {
   CHECK(output_columnar_);
   CHECK(group_idx < getGroupbyColCount());
   size_t offset{0};
   for (size_t col_idx = 0; col_idx < group_idx; col_idx++) {
     // TODO(Saman): relax that int64_bit part immediately
     offset += align_to_int64(
         std::max(groupColWidth(col_idx), static_cast<int8_t>(sizeof(int64_t))) *
         getEntryCount());
   }
   return offset;
 }

Here is the call graph for this function:

Here is the caller graph for this function:

QueryDescriptionType QueryMemoryDescriptor::getQueryDescriptionType ( ) const

inline

Definition at line 173 of file QueryMemoryDescriptor.h.

References query_desc_type_.

Referenced by ResultSetStorage::binSearchRowCount(), ResultSetReductionJIT::codegen(), GroupByAndAggregate::codegen(), GpuReductionHelperJIT::codegen(), GroupByAndAggregate::codegenAggCalls(), GroupByAndAggregate::codegenAggColumnPtr(), GroupByAndAggregate::codegenGroupBy(), GroupByAndAggregate::codegenMultiColumnPerfectHash(), GroupByAndAggregate::codegenOutputSlot(), Executor::collectAllDeviceResults(), copy_projection_buffer_from_gpu_columnar(), Executor::createKernels(), ResultSet::getQueryDescriptionType(), GpuReductionHelperJIT::GpuReductionHelperJIT(), GpuSharedMemCodeBuilder::GpuSharedMemCodeBuilder(), init_agg_val_vec(), QueryMemoryInitializer::initColumnarGroups(), anonymous_namespace{TargetExprBuilder.cpp}::is_columnar_projection(), ResultSetReductionJIT::isEmpty(), ResultSetStorage::isEmptyEntry(), ResultSetStorage::isEmptyEntryColumnar(), isSingleColumnGroupByWithPerfectHash(), QueryExecutionContext::launchCpuCode(), QueryExecutionContext::launchGpuCode(), ResultSetStorage::moveEntriesToBuffer(), TargetExprCodegenBuilder::operator()(), ResultSetStorage::reduce(), Executor::reduceMultiDeviceResultSets(), ResultSetStorage::reduceOneEntryBaseline(), ResultSetReductionJIT::reduceOneEntryBaselineIdx(), ResultSetReductionJIT::reduceOneEntryNoCollisionsIdx(), ExecutionKernel::run(), ExecutionKernel::runImpl(), target_exprs_to_infos(), and ResultSet::updateStorageEntryCount().

173 { return query_desc_type_; }

QueryMemoryDescriptor::query_desc_type_

QueryDescriptionType query_desc_type_

Definition: QueryMemoryDescriptor.h:391

Here is the caller graph for this function:

std::unique_ptr< QueryExecutionContext > QueryMemoryDescriptor::getQueryExecutionContext	(	const RelAlgExecutionUnit &	ra_exe_unit,
		const Executor *	executor,
		const ExecutorDeviceType	device_type,
		const ExecutorDispatchMode	dispatch_mode,
		const int	device_id,
		const shared::TableKey &	outer_table_key,
		const int64_t	num_rows,
		const std::vector< std::vector< const int8_t * >> &	col_buffers,
		const std::vector< std::vector< uint64_t >> &	frag_offsets,
		std::shared_ptr< RowSetMemoryOwner >	row_set_mem_owner,
		const bool	output_columnar,
		const bool	sort_on_gpu,
		const size_t	thread_idx,
		RenderInfo *	render_info
	)		const

Definition at line 698 of file QueryMemoryDescriptor.cpp.

References DEBUG_TIMER, and QueryExecutionContext.

Referenced by ExecutionKernel::runImpl().

                                    {
   auto timer = DEBUG_TIMER(__func__);
   if (frag_offsets.empty()) {
     return nullptr;
   }
   return std::unique_ptr<QueryExecutionContext>(
       new QueryExecutionContext(ra_exe_unit,
                                 *this,
                                 executor,
                                 device_type,
                                 dispatch_mode,
                                 device_id,
                                 outer_table_key,
                                 num_rows,
                                 col_buffers,
                                 frag_offsets,
                                 row_set_mem_owner,
                                 output_columnar,
                                 sort_on_gpu,
                                 thread_idx,
                                 render_info));
 }

Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getRowSize ( ) const

Definition at line 835 of file QueryMemoryDescriptor.cpp.

References align_to_int64(), CHECK, getColsSize(), getEffectiveKeyWidth(), group_col_widths_, heavyai::GroupByPerfectHash, keyless_hash_, output_columnar_, and query_desc_type_.

                                                {
   CHECK(!output_columnar_);
   size_t total_bytes{0};
   if (keyless_hash_) {
     // ignore, there's no group column in the output buffer
     CHECK(query_desc_type_ == QueryDescriptionType::GroupByPerfectHash);
   } else {
     total_bytes += group_col_widths_.size() * getEffectiveKeyWidth();
     total_bytes = align_to_int64(total_bytes);
   }
   total_bytes += getColsSize();
   return align_to_int64(total_bytes);
 }

Here is the call graph for this function:

Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getRowWidth ( ) const

Definition at line 1214 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getAllSlotsPaddedSize().

Referenced by get_row_bytes().

                                                 {
   // Note: Actual row size may include padding (see ResultSetBufferAccessors.h)
   return col_slot_context_.getAllSlotsPaddedSize();
 }

Here is the call graph for this function:

Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getSlotCount ( ) const

Definition at line 1185 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getSlotCount().

                                                  {
   return col_slot_context_.getSlotCount();
 }

Here is the call graph for this function:

Here is the caller graph for this function:

const int8_t QueryMemoryDescriptor::getSlotIndexForSingleSlotCol ( const size_t col_idx ) const

Definition at line 1203 of file QueryMemoryDescriptor.cpp.

References CHECK_EQ, col_slot_context_, and ColSlotContext::getSlotsForCol().

Referenced by QueryMemoryInitializer::allocateCountDistinctBuffers(), QueryMemoryInitializer::allocateModeBuffer(), QueryMemoryInitializer::allocateTDigestsBuffer(), QueryMemoryInitializer::calculateCountDistinctBufferSize(), anonymous_namespace{QueryMemoryInitializer.cpp}::check_count_distinct_expr_metadata(), QueryMemoryInitializer::initializeModeIndexSet(), and QueryMemoryInitializer::initializeQuantileParams().

                                 {
   const auto& col_slots = col_slot_context_.getSlotsForCol(col_idx);
   CHECK_EQ(col_slots.size(), size_t(1));
   return col_slots.front();
 }

Here is the call graph for this function:

Here is the caller graph for this function:

int64_t QueryMemoryDescriptor::getTargetGroupbyIndex ( const size_t target_idx ) const

inline

Definition at line 243 of file QueryMemoryDescriptor.h.

References CHECK_LT, and target_groupby_indices_.

Referenced by ResultSet::getTargetValueFromBufferColwise(), ResultSet::getTargetValueFromBufferRowwise(), ResultSetReductionJIT::reduceOneEntryBaseline(), ResultSetStorage::reduceOneEntrySlotsBaseline(), ResultSetReductionJIT::reduceOneEntryTargetsNoCollisions(), ResultSetReductionJIT::reduceOneSlot(), ResultSetStorage::reduceOneSlot(), and reductionKey().

                                                                {
     CHECK_LT(target_idx, target_groupby_indices_.size());
     return target_groupby_indices_[target_idx];
   }

Here is the caller graph for this function:

int32_t QueryMemoryDescriptor::getTargetIdxForKey ( ) const

inline

Definition at line 186 of file QueryMemoryDescriptor.h.

References idx_target_as_key_.

Referenced by ResultSetReductionJIT::isEmpty(), ResultSetStorage::isEmptyEntry(), ResultSetStorage::isEmptyEntryColumnar(), ResultSetStorage::reduceSingleRow(), and reductionKey().

186 { return idx_target_as_key_; }

QueryMemoryDescriptor::idx_target_as_key_

int32_t idx_target_as_key_

Definition: QueryMemoryDescriptor.h:394

Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getTotalBytesOfColumnarBuffers ( ) const

private

Returns the maximum total number of bytes (including required paddings) to store all non-lazy columns' results for columnar cases.

Definition at line 862 of file QueryMemoryDescriptor.cpp.

References CHECK, col_slot_context_, entry_count_, ColSlotContext::getTotalBytesOfColumnarBuffers(), and output_columnar_.

Referenced by getBufferSizeBytes(), and getTotalBytesOfColumnarProjections().

                                                                    {
   CHECK(output_columnar_);
   return col_slot_context_.getTotalBytesOfColumnarBuffers(entry_count_);
 }

Here is the call graph for this function:

Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getTotalBytesOfColumnarBuffers ( const size_t num_entries_per_column ) const

private

This is a helper function that returns the total number of bytes (including required paddings) to store all non-lazy columns' results for columnar cases.

Definition at line 871 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getTotalBytesOfColumnarBuffers().

                                                {
   return col_slot_context_.getTotalBytesOfColumnarBuffers(num_entries_per_column);
 }

Here is the call graph for this function:

size_t QueryMemoryDescriptor::getTotalBytesOfColumnarProjections ( const size_t projection_count ) const

private

Returns the effective total number of bytes from columnar projections, which includes 1) total number of bytes used to store all non-lazy columns 2) total number of bytes used to store row indices (for lazy fetches, etc.)

NOTE: this function does not represent the buffer sizes dedicated for the results, but the required memory to fill all valid results into a compact new buffer (with no holes in it)

Definition at line 885 of file QueryMemoryDescriptor.cpp.

References getTotalBytesOfColumnarBuffers().

                                          {
   constexpr size_t row_index_width = sizeof(int64_t);
   return getTotalBytesOfColumnarBuffers(projection_count) +
          row_index_width * projection_count;
 }

Here is the call graph for this function:

size_t QueryMemoryDescriptor::getWarpCount ( ) const

Definition at line 849 of file QueryMemoryDescriptor.cpp.

References executor_, and interleaved_bins_on_gpu_.

Referenced by getColOffInBytes(), getColOffInBytesInNextBin(), and getNextColOffInBytes().

                                                  {
   return (interleaved_bins_on_gpu_ ? executor_->warpSize() : 1);
 }

Here is the caller graph for this function:

int8_t QueryMemoryDescriptor::groupColWidth ( const size_t key_idx ) const

inline

Definition at line 189 of file QueryMemoryDescriptor.h.

References CHECK_LT, and group_col_widths_.

Referenced by ResultSetStorage::copyKeyColWise(), getPrependedGroupBufferSizeInBytes(), getPrependedGroupColOffInBytes(), ResultSetStorage::isEmptyEntryColumnar(), and anonymous_namespace{Execute.cpp}::permute_storage_columnar().

                                                    {
     CHECK_LT(key_idx, group_col_widths_.size());
     return group_col_widths_[key_idx];
   }

Here is the caller graph for this function:

const auto QueryMemoryDescriptor::groupColWidthsBegin ( ) const

inline

Definition at line 196 of file QueryMemoryDescriptor.h.

References group_col_widths_.

196 { return group_col_widths_.begin(); }

QueryMemoryDescriptor::group_col_widths_

std::vector< int8_t > group_col_widths_

Definition: QueryMemoryDescriptor.h:398

const auto QueryMemoryDescriptor::groupColWidthsEnd ( ) const

inline

Definition at line 197 of file QueryMemoryDescriptor.h.

References group_col_widths_.

197 { return group_col_widths_.end(); }

QueryMemoryDescriptor::group_col_widths_

std::vector< int8_t > group_col_widths_

Definition: QueryMemoryDescriptor.h:398

bool QueryMemoryDescriptor::hasInterleavedBinsOnGpu ( ) const

inline

Definition at line 183 of file QueryMemoryDescriptor.h.

References interleaved_bins_on_gpu_.

183 { return interleaved_bins_on_gpu_; }

QueryMemoryDescriptor::interleaved_bins_on_gpu_

bool interleaved_bins_on_gpu_

Definition: QueryMemoryDescriptor.h:393

bool QueryMemoryDescriptor::hasKeylessHash ( ) const

inline

Definition at line 180 of file QueryMemoryDescriptor.h.

References keyless_hash_.

Referenced by GroupByAndAggregate::codegenMultiColumnPerfectHash(), GroupByAndAggregate::codegenSingleColumnPerfectHash(), QueryMemoryInitializer::createAndInitializeGroupByBufferGpu(), get_key_bytes_rowwise(), ResultSet::getTargetValueFromBufferRowwise(), GpuReductionHelperJIT::GpuReductionHelperJIT(), GpuSharedMemCodeBuilder::GpuSharedMemCodeBuilder(), QueryMemoryInitializer::initColumnarGroups(), ResultSetStorage::initializeColWise(), ResultSetStorage::initializeRowWise(), QueryMemoryInitializer::initRowGroups(), inplace_sort_gpu(), ResultSetReductionJIT::isEmpty(), ResultSetStorage::isEmptyEntry(), ResultSetStorage::isEmptyEntryColumnar(), QueryExecutionContext::launchGpuCode(), ResultSetStorage::moveEntriesToBuffer(), QueryMemoryInitializer::prepareTopNHeapsDevBuffer(), QueryMemoryInitializer::QueryMemoryInitializer(), ResultSetStorage::reduceEntriesNoCollisionsColWise(), ResultSetStorage::reduceOneEntryBaseline(), ResultSetReductionJIT::reduceOneEntryBaselineIdx(), and ResultSetStorage::reduceSingleRow().

180 { return keyless_hash_; }

QueryMemoryDescriptor::keyless_hash_

bool keyless_hash_

Definition: QueryMemoryDescriptor.h:392

Here is the caller graph for this function:

bool QueryMemoryDescriptor::hasNulls ( ) const

inline

Definition at line 268 of file QueryMemoryDescriptor.h.

References has_nulls_.

Referenced by GroupByAndAggregate::codegenGroupBy().

268 { return has_nulls_; }

QueryMemoryDescriptor::has_nulls_

bool has_nulls_

Definition: QueryMemoryDescriptor.h:408

Here is the caller graph for this function:

bool QueryMemoryDescriptor::hasVarlenOutput ( ) const

inline

Definition at line 358 of file QueryMemoryDescriptor.h.

References col_slot_context_, and ColSlotContext::hasVarlenOutput().

358 { return col_slot_context_.hasVarlenOutput(); }

QueryMemoryDescriptor::col_slot_context_

ColSlotContext col_slot_context_

Definition: QueryMemoryDescriptor.h:419

ColSlotContext::hasVarlenOutput

bool hasVarlenOutput() const

Definition: ColSlotContext.h:120

Here is the call graph for this function:

Here is the caller graph for this function:

std::unique_ptr< QueryMemoryDescriptor > QueryMemoryDescriptor::init	(	const Executor *	executor,
		const RelAlgExecutionUnit &	ra_exe_unit,
		const std::vector< InputTableInfo > &	query_infos,
		const ColRangeInfo &	col_range_info,
		const KeylessInfo &	keyless_info,
		const bool	allow_multifrag,
		const ExecutorDeviceType	device_type,
		const int8_t	crt_min_byte_width,
		const bool	sort_on_gpu_hint,
		const size_t	shard_count,
		const size_t	max_groups_buffer_entry_count,
		RenderInfo *	render_info,
		const ApproxQuantileDescriptors &	approx_quantile_descriptors,
		const CountDistinctDescriptors	count_distinct_descriptors,
		const bool	must_use_baseline_sort,
		const bool	output_columnar_hint,
		const bool	streaming_top_n_hint,
		const bool	threads_can_reuse_group_by_buffers
	)

static

Definition at line 240 of file QueryMemoryDescriptor.cpp.

References anonymous_namespace{QueryMemoryDescriptor.cpp}::get_col_byte_widths(), RelAlgExecutionUnit::groupby_exprs, and RelAlgExecutionUnit::target_exprs.

Referenced by GroupByAndAggregate::initQueryMemoryDescriptorImpl().

                                                    {
   auto group_col_widths = get_col_byte_widths(ra_exe_unit.groupby_exprs);
   const bool is_group_by{!group_col_widths.empty()};
 
   auto col_slot_context = ColSlotContext(ra_exe_unit.target_exprs, {});
 
   const auto min_slot_size = QueryMemoryDescriptor::pick_target_compact_width(
       ra_exe_unit, query_infos, crt_min_byte_width);
 
   col_slot_context.setAllSlotsPaddedSize(min_slot_size);
   col_slot_context.validate();
 
   if (!is_group_by) {
     CHECK(!must_use_baseline_sort);
 
     return std::make_unique<QueryMemoryDescriptor>(
         executor,
         ra_exe_unit,
         query_infos,
         allow_multifrag,
         false,
         false,
         -1,
         ColRangeInfo{ra_exe_unit.estimator ? QueryDescriptionType::Estimator
                                            : QueryDescriptionType::NonGroupedAggregate,
                      0,
                      0,
                      0,
                      false},
         col_slot_context,
         std::vector<int8_t>{},
         /*group_col_compact_width=*/0,
         std::vector<int64_t>{},
         /*entry_count=*/1,
         approx_quantile_descriptors,
         count_distinct_descriptors,
         false,
         output_columnar_hint,
         render_info && render_info->isInSitu(),
         must_use_baseline_sort,
         /*use_streaming_top_n=*/false,
         threads_can_reuse_group_by_buffers);
   }
 
   size_t entry_count = 1;
   auto actual_col_range_info = col_range_info;
   bool interleaved_bins_on_gpu = false;
   bool keyless_hash = false;
   bool streaming_top_n = false;
   int8_t group_col_compact_width = 0;
   int32_t idx_target_as_key = -1;
   auto output_columnar = output_columnar_hint;
   std::vector<int64_t> target_groupby_indices;
 
   switch (col_range_info.hash_type_) {
     case QueryDescriptionType::GroupByPerfectHash: {
       if (render_info) {
         // TODO(croot): this can be removed now thanks to the more centralized
         // NonInsituQueryClassifier code, but keeping it just in case
         render_info->setNonInSitu();
       }
       // keyless hash: whether or not group columns are stored at the beginning of the
       // output buffer
       keyless_hash =
           (!sort_on_gpu_hint ||
            !QueryMemoryDescriptor::many_entries(
                col_range_info.max, col_range_info.min, col_range_info.bucket)) &&
           !col_range_info.bucket && !must_use_baseline_sort && keyless_info.keyless;
 
       // if keyless, then this target index indicates wheter an entry is empty or not
       // (acts as a key)
       idx_target_as_key = keyless_info.target_index;
 
       if (group_col_widths.size() > 1) {
         // col range info max contains the expected cardinality of the output
         entry_count = static_cast<size_t>(actual_col_range_info.max);
         actual_col_range_info.bucket = 0;
       } else {
         // single column perfect hash
         entry_count = std::max(
             GroupByAndAggregate::getBucketedCardinality(col_range_info), int64_t(1));
         const size_t interleaved_max_threshold{512};
 
         if (must_use_baseline_sort) {
           target_groupby_indices = target_expr_group_by_indices(ra_exe_unit.groupby_exprs,
                                                                 ra_exe_unit.target_exprs);
           col_slot_context =
               ColSlotContext(ra_exe_unit.target_exprs, target_groupby_indices);
         }
 
         bool has_varlen_sample_agg = false;
         for (const auto& target_expr : ra_exe_unit.target_exprs) {
           if (target_expr->get_contains_agg()) {
             const auto agg_expr = dynamic_cast<Analyzer::AggExpr*>(target_expr);
             CHECK(agg_expr);
             if (agg_expr->get_aggtype() == kSAMPLE &&
                 agg_expr->get_type_info().is_varlen()) {
               has_varlen_sample_agg = true;
               break;
             }
           }
         }
 
         interleaved_bins_on_gpu = keyless_hash && !has_varlen_sample_agg &&
                                   (entry_count <= interleaved_max_threshold) &&
                                   (device_type == ExecutorDeviceType::GPU) &&
                                   QueryMemoryDescriptor::countDescriptorsLogicallyEmpty(
                                       count_distinct_descriptors) &&
                                   !output_columnar;
       }
       break;
     }
     case QueryDescriptionType::GroupByBaselineHash: {
       if (render_info) {
         // TODO(croot): this can be removed now thanks to the more centralized
         // NonInsituQueryClassifier code, but keeping it just in case
         render_info->setNonInSitu();
       }
       entry_count = shard_count
                         ? (max_groups_buffer_entry_count + shard_count - 1) / shard_count
                         : max_groups_buffer_entry_count;
       target_groupby_indices = target_expr_group_by_indices(ra_exe_unit.groupby_exprs,
                                                             ra_exe_unit.target_exprs);
       col_slot_context = ColSlotContext(ra_exe_unit.target_exprs, target_groupby_indices);
 
       group_col_compact_width =
           output_columnar ? 8
                           : pick_baseline_key_width(ra_exe_unit, query_infos, executor);
 
       actual_col_range_info =
           ColRangeInfo{QueryDescriptionType::GroupByBaselineHash, 0, 0, 0, false};
       break;
     }
     case QueryDescriptionType::Projection: {
       CHECK(!must_use_baseline_sort);
 
       if (streaming_top_n_hint && use_streaming_top_n(ra_exe_unit, output_columnar)) {
         streaming_top_n = true;
         entry_count =
             ra_exe_unit.sort_info.offset + ra_exe_unit.sort_info.limit.value_or(0);
       } else {
         if (ra_exe_unit.use_bump_allocator) {
           output_columnar = false;
           entry_count = 0;
         } else {
           entry_count = ra_exe_unit.scan_limit
                             ? static_cast<size_t>(ra_exe_unit.scan_limit)
                             : max_groups_buffer_entry_count;
         }
       }
 
       target_groupby_indices = executor->plan_state_->allow_lazy_fetch_
                                    ? target_expr_proj_indices(ra_exe_unit)
                                    : std::vector<int64_t>{};
 
       col_slot_context = ColSlotContext(ra_exe_unit.target_exprs, target_groupby_indices);
       break;
     }
     default:
       UNREACHABLE() << "Unknown query type";
   }
 
   return std::make_unique<QueryMemoryDescriptor>(executor,
                                                  ra_exe_unit,
                                                  query_infos,
                                                  allow_multifrag,
                                                  keyless_hash,
                                                  interleaved_bins_on_gpu,
                                                  idx_target_as_key,
                                                  actual_col_range_info,
                                                  col_slot_context,
                                                  group_col_widths,
                                                  group_col_compact_width,
                                                  target_groupby_indices,
                                                  entry_count,
                                                  approx_quantile_descriptors,
                                                  count_distinct_descriptors,
                                                  sort_on_gpu_hint,
                                                  output_columnar,
                                                  render_info && render_info->isInSitu(),
                                                  must_use_baseline_sort,
                                                  streaming_top_n,
                                                  threads_can_reuse_group_by_buffers);
 }

Here is the call graph for this function:

Here is the caller graph for this function:

bool QueryMemoryDescriptor::interleavedBins ( const ExecutorDeviceType device_type ) const

Definition at line 1168 of file QueryMemoryDescriptor.cpp.

References GPU, and interleaved_bins_on_gpu_.

Referenced by canOutputColumnar(), GroupByAndAggregate::codegenSingleColumnPerfectHash(), QueryMemoryInitializer::createAndInitializeGroupByBufferGpu(), getBufferSizeBytes(), QueryExecutionContext::groupBufferToResults(), QueryMemoryInitializer::initGroupByBuffer(), and QueryMemoryInitializer::QueryMemoryInitializer().

                                                                                       {
   return interleaved_bins_on_gpu_ && device_type == ExecutorDeviceType::GPU;
 }

Here is the caller graph for this function:

bool QueryMemoryDescriptor::isGroupBy ( ) const

inline

Definition at line 200 of file QueryMemoryDescriptor.h.

References group_col_widths_.

Referenced by anonymous_namespace{TargetExprBuilder.cpp}::get_initial_agg_val(), init_agg_val_vec(), QueryMemoryInitializer::initColumnsPerRow(), QueryExecutionContext::launchCpuCode(), QueryExecutionContext::launchGpuCode(), and QueryMemoryInitializer::QueryMemoryInitializer().

200 { return !group_col_widths_.empty(); }

QueryMemoryDescriptor::group_col_widths_

std::vector< int8_t > group_col_widths_

Definition: QueryMemoryDescriptor.h:398

Here is the caller graph for this function:

bool QueryMemoryDescriptor::isLogicalSizedColumnsAllowed ( ) const

Definition at line 1116 of file QueryMemoryDescriptor.cpp.

References g_cluster, output_columnar_, heavyai::Projection, query_desc_type_, and heavyai::TableFunction.

Referenced by TargetExprCodegen::codegenAggregate(), TargetExprCodegenBuilder::codegenMultiSlotSampleExpressions(), TargetExprCodegenBuilder::codegenSlotEmptyKey(), init_agg_val_vec(), ResultSet::makeTargetValue(), QueryMemoryDescriptor(), ResultSetStorage::reduceOneSlot(), ResultSetStorage::reduceOneSlotSingleValue(), and setOutputColumnar().

                                                                {
   // In distributed mode, result sets are serialized using rowwise iterators, so we use
   // consistent slot widths for now
   return output_columnar_ && !g_cluster &&
          (query_desc_type_ == QueryDescriptionType::Projection ||
           query_desc_type_ == QueryDescriptionType::TableFunction);
 }

Here is the caller graph for this function:

bool QueryMemoryDescriptor::isSingleColumnGroupByWithPerfectHash ( ) const

inline

Definition at line 175 of file QueryMemoryDescriptor.h.

References getGroupbyColCount(), getQueryDescriptionType(), and heavyai::GroupByPerfectHash.

Referenced by GroupByAndAggregate::codegenGroupBy(), and ResultSet::getTargetValueFromBufferRowwise().

                                                     {
     return getQueryDescriptionType() == QueryDescriptionType::GroupByPerfectHash &&
            getGroupbyColCount() == 1;
   }

Here is the call graph for this function:

Here is the caller graph for this function:

bool QueryMemoryDescriptor::isWarpSyncRequired ( const ExecutorDeviceType device_type ) const

Definition at line 1173 of file QueryMemoryDescriptor.cpp.

References executor_, and GPU.

Referenced by query_group_by_template().

                                                 {
   if (device_type == ExecutorDeviceType::GPU) {
     return executor_->cudaMgr()->isArchVoltaOrGreaterForAll();
   }
   return false;
 }

Here is the caller graph for this function:

bool QueryMemoryDescriptor::lazyInitGroups ( const ExecutorDeviceType device_type ) const

Definition at line 1163 of file QueryMemoryDescriptor.cpp.

References count_distinct_descriptors_, countDescriptorsLogicallyEmpty(), GPU, and render_output_.

Referenced by create_dev_group_by_buffers(), QueryMemoryInitializer::createAndInitializeGroupByBufferGpu(), QueryMemoryInitializer::prepareTopNHeapsDevBuffer(), QueryMemoryInitializer::QueryMemoryInitializer(), and toString().

                                                                                      {
   return device_type == ExecutorDeviceType::GPU && !render_output_ &&
          countDescriptorsLogicallyEmpty(count_distinct_descriptors_);
 }

Here is the call graph for this function:

Here is the caller graph for this function:

static bool QueryMemoryDescriptor::many_entries	(	const int64_t	max_val,
		const int64_t	min_val,
		const int64_t	bucket
	)

inlinestatic

Definition at line 147 of file QueryMemoryDescriptor.h.

Referenced by blocksShareMemory().

                                                  {
     return max_val - min_val > 10000 * std::max(bucket, int64_t(1));
   }

Here is the caller graph for this function:

bool QueryMemoryDescriptor::mustUseBaselineSort ( ) const

inline

Definition at line 292 of file QueryMemoryDescriptor.h.

References must_use_baseline_sort_.

Referenced by GroupByAndAggregate::codegenSingleColumnPerfectHash().

292 { return must_use_baseline_sort_; }

QueryMemoryDescriptor::must_use_baseline_sort_

bool must_use_baseline_sort_

Definition: QueryMemoryDescriptor.h:414

Here is the caller graph for this function:

bool QueryMemoryDescriptor::operator== ( const QueryMemoryDescriptor & other ) const

Definition at line 631 of file QueryMemoryDescriptor.cpp.

References bucket_, col_slot_context_, count_distinct_descriptors_, force_4byte_float_, group_col_compact_width_, group_col_widths_, has_nulls_, idx_target_as_key_, interleaved_bins_on_gpu_, keyless_hash_, max_val_, min_val_, output_columnar_, query_desc_type_, sort_on_gpu_, target_groupby_indices_, and threads_can_reuse_group_by_buffers_.

                                                                                {
   // Note that this method does not check ptr reference members (e.g. executor_) or
   // entry_count_
   if (query_desc_type_ != other.query_desc_type_) {
     return false;
   }
   if (keyless_hash_ != other.keyless_hash_) {
     return false;
   }
   if (interleaved_bins_on_gpu_ != other.interleaved_bins_on_gpu_) {
     return false;
   }
   if (idx_target_as_key_ != other.idx_target_as_key_) {
     return false;
   }
   if (force_4byte_float_ != other.force_4byte_float_) {
     return false;
   }
   if (group_col_widths_ != other.group_col_widths_) {
     return false;
   }
   if (group_col_compact_width_ != other.group_col_compact_width_) {
     return false;
   }
   if (target_groupby_indices_ != other.target_groupby_indices_) {
     return false;
   }
   if (min_val_ != other.min_val_) {
     return false;
   }
   if (max_val_ != other.max_val_) {
     return false;
   }
   if (bucket_ != other.bucket_) {
     return false;
   }
   if (has_nulls_ != other.has_nulls_) {
     return false;
   }
   if (count_distinct_descriptors_.size() != other.count_distinct_descriptors_.size()) {
     return false;
   } else {
     // Count distinct descriptors can legitimately differ in device only.
     for (size_t i = 0; i < count_distinct_descriptors_.size(); ++i) {
       auto ref_count_distinct_desc = other.count_distinct_descriptors_[i];
       auto count_distinct_desc = count_distinct_descriptors_[i];
       count_distinct_desc.device_type = ref_count_distinct_desc.device_type;
       if (ref_count_distinct_desc != count_distinct_desc) {
         return false;
       }
     }
   }
   if (sort_on_gpu_ != other.sort_on_gpu_) {
     return false;
   }
   if (output_columnar_ != other.output_columnar_) {
     return false;
   }
   if (col_slot_context_ != other.col_slot_context_) {
     return false;
   }
   if (threads_can_reuse_group_by_buffers_ != other.threads_can_reuse_group_by_buffers_) {
     return false;
   }
   return true;
 }

int8_t QueryMemoryDescriptor::pick_target_compact_width	(	const RelAlgExecutionUnit &	ra_exe_unit,
		const std::vector< InputTableInfo > &	query_infos,
		const int8_t	crt_min_byte_width
	)

static

Definition at line 735 of file QueryMemoryDescriptor.cpp.

References CHECK, CHECK_EQ, g_bigint_count, anonymous_namespace{QueryMemoryDescriptor.cpp}::get_col_byte_widths(), Analyzer::UOper::get_operand(), Analyzer::Expr::get_type_info(), RelAlgExecutionUnit::groupby_exprs, RelAlgExecutionUnit::input_col_descs, anonymous_namespace{QueryMemoryDescriptor.cpp}::is_int_and_no_bigger_than(), kCOUNT, kENCODING_DICT, kUNNEST, and RelAlgExecutionUnit::target_exprs.

                                      {
   if (g_bigint_count) {
     return sizeof(int64_t);
   }
   int8_t compact_width{0};
   auto col_it = ra_exe_unit.input_col_descs.begin();
   auto const end = ra_exe_unit.input_col_descs.end();
   int unnest_array_col_id{std::numeric_limits<int>::min()};
   for (const auto& groupby_expr : ra_exe_unit.groupby_exprs) {
     const auto uoper = dynamic_cast<Analyzer::UOper*>(groupby_expr.get());
     if (uoper && uoper->get_optype() == kUNNEST) {
       const auto& arg_ti = uoper->get_operand()->get_type_info();
       CHECK(arg_ti.is_array());
       const auto& elem_ti = arg_ti.get_elem_type();
       if (elem_ti.is_string() && elem_ti.get_compression() == kENCODING_DICT) {
         unnest_array_col_id = (*col_it)->getColId();
       } else {
         compact_width = crt_min_byte_width;
         break;
       }
     }
     if (col_it != end) {
       ++col_it;
     }
   }
   if (!compact_width &&
       (ra_exe_unit.groupby_exprs.size() != 1 || !ra_exe_unit.groupby_exprs.front())) {
     compact_width = crt_min_byte_width;
   }
   if (!compact_width) {
     col_it = ra_exe_unit.input_col_descs.begin();
     std::advance(col_it, ra_exe_unit.groupby_exprs.size());
     for (const auto target : ra_exe_unit.target_exprs) {
       const auto& ti = target->get_type_info();
       const auto agg = dynamic_cast<const Analyzer::AggExpr*>(target);
       if (agg && agg->get_arg()) {
         compact_width = crt_min_byte_width;
         break;
       }
 
       if (agg) {
         CHECK_EQ(kCOUNT, agg->get_aggtype());
         CHECK(!agg->get_is_distinct());
         if (col_it != end) {
           ++col_it;
         }
         continue;
       }
 
       if (is_int_and_no_bigger_than(ti, 4) ||
           (ti.is_string() && ti.get_compression() == kENCODING_DICT)) {
         if (col_it != end) {
           ++col_it;
         }
         continue;
       }
 
       const auto uoper = dynamic_cast<Analyzer::UOper*>(target);
       if (uoper && uoper->get_optype() == kUNNEST &&
           (*col_it)->getColId() == unnest_array_col_id) {
         const auto arg_ti = uoper->get_operand()->get_type_info();
         CHECK(arg_ti.is_array());
         const auto& elem_ti = arg_ti.get_elem_type();
         if (elem_ti.is_string() && elem_ti.get_compression() == kENCODING_DICT) {
           if (col_it != end) {
             ++col_it;
           }
           continue;
         }
       }
 
       compact_width = crt_min_byte_width;
       break;
     }
   }
   if (!compact_width) {
     size_t total_tuples{0};
     for (const auto& qi : query_infos) {
       total_tuples += qi.info.getNumTuples();
     }
     return total_tuples <= static_cast<size_t>(std::numeric_limits<uint32_t>::max()) ||
                    unnest_array_col_id != std::numeric_limits<int>::min()
                ? 4
                : crt_min_byte_width;
   } else {
     // TODO(miyu): relax this condition to allow more cases just w/o padding
     for (auto wid : get_col_byte_widths(ra_exe_unit.target_exprs)) {
       compact_width = std::max(compact_width, wid);
     }
     return compact_width;
   }
 }

Here is the call graph for this function:

std::string QueryMemoryDescriptor::queryDescTypeToString ( ) const

Definition at line 1247 of file QueryMemoryDescriptor.cpp.

References heavyai::GroupByBaselineHash, heavyai::GroupByPerfectHash, heavyai::NonGroupedAggregate, heavyai::Projection, query_desc_type_, heavyai::TableFunction, and UNREACHABLE.

Referenced by reductionKey().

                                                              {
   switch (query_desc_type_) {
     case QueryDescriptionType::GroupByPerfectHash:
       return "Perfect Hash";
     case QueryDescriptionType::GroupByBaselineHash:
       return "Baseline Hash";
     case QueryDescriptionType::Projection:
       return "Projection";
     case QueryDescriptionType::TableFunction:
       return "Table Function";
     case QueryDescriptionType::NonGroupedAggregate:
       return "Non-grouped Aggregate";
     case QueryDescriptionType::Estimator:
       return "Estimator";
     default:
       UNREACHABLE();
   }
   return "";
 }

Here is the caller graph for this function:

std::string QueryMemoryDescriptor::reductionKey ( ) const

Definition at line 1293 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, getEffectiveKeyWidth(), getGroupbyColCount(), getTargetGroupbyIndex(), getTargetIdxForKey(), join(), keyless_hash_, queryDescTypeToString(), targetGroupbyIndicesSize(), to_string(), ColSlotContext::toString(), and toString().

Referenced by ResultSetReductionJIT::cacheKey(), and toString().

                                                     {
   std::string str;
   str += "Query Memory Descriptor State\n";
   str += "\tQuery Type: " + queryDescTypeToString() + "\n";
   str +=
       "\tKeyless Hash: " + ::toString(keyless_hash_) +
       (keyless_hash_ ? ", target index for key: " + std::to_string(getTargetIdxForKey())
                      : "") +
       "\n";
   str += "\tEffective key width: " + std::to_string(getEffectiveKeyWidth()) + "\n";
   str += "\tNumber of group columns: " + std::to_string(getGroupbyColCount()) + "\n";
   const auto group_indices_size = targetGroupbyIndicesSize();
   if (group_indices_size) {
     std::vector<std::string> group_indices_strings;
     for (size_t target_idx = 0; target_idx < group_indices_size; ++target_idx) {
       group_indices_strings.push_back(std::to_string(getTargetGroupbyIndex(target_idx)));
     }
     str += "\tTarget group by indices: " +
            boost::algorithm::join(group_indices_strings, ",") + "\n";
   }
   str += "\t" + col_slot_context_.toString();
   return str;
 }

Here is the call graph for this function:

Here is the caller graph for this function:

void QueryMemoryDescriptor::resetGroupColWidths ( const std::vector< int8_t > & new_group_col_widths )

inlineprotected

Definition at line 384 of file QueryMemoryDescriptor.h.

References group_col_widths_.

                                                                           {
     group_col_widths_ = new_group_col_widths;
   }

void QueryMemoryDescriptor::setAllTargetGroupbyIndices ( std::vector< int64_t > group_by_indices )

inline

Definition at line 248 of file QueryMemoryDescriptor.h.

References target_groupby_indices_.

                                                                        {
     target_groupby_indices_ = group_by_indices;
   }

void QueryMemoryDescriptor::setAvailableCpuThreads ( size_t num_available_threads ) const

inline

Definition at line 374 of file QueryMemoryDescriptor.h.

References num_available_threads_.

Referenced by ExecutionKernel::runImpl().

                                                                   {
     num_available_threads_ = num_available_threads;
   }

Here is the caller graph for this function:

void QueryMemoryDescriptor::setEntryCount ( const size_t val )

inline

Definition at line 262 of file QueryMemoryDescriptor.h.

References entry_count_.

Referenced by Executor::executePlanWithGroupBy(), Executor::reduceMultiDeviceResultSets(), ResultSetStorage::updateEntryCount(), and ResultSet::updateStorageEntryCount().

262 { entry_count_ = val; }

QueryMemoryDescriptor::entry_count_

size_t entry_count_

Definition: QueryMemoryDescriptor.h:403

Here is the caller graph for this function:

void QueryMemoryDescriptor::setForceFourByteFloat ( const bool val )

inline

Definition at line 305 of file QueryMemoryDescriptor.h.

References force_4byte_float_.

305 { force_4byte_float_ = val; }

QueryMemoryDescriptor::force_4byte_float_

bool force_4byte_float_

Definition: QueryMemoryDescriptor.h:417

void QueryMemoryDescriptor::setGroupColCompactWidth ( const int8_t val )

inline

Definition at line 202 of file QueryMemoryDescriptor.h.

References group_col_compact_width_.

202 { group_col_compact_width_ = val; }

QueryMemoryDescriptor::group_col_compact_width_

int8_t group_col_compact_width_

Definition: QueryMemoryDescriptor.h:399

void QueryMemoryDescriptor::setHasInterleavedBinsOnGpu ( const bool val )

inline

Definition at line 184 of file QueryMemoryDescriptor.h.

References interleaved_bins_on_gpu_.

184 { interleaved_bins_on_gpu_ = val; }

QueryMemoryDescriptor::interleaved_bins_on_gpu_

bool interleaved_bins_on_gpu_

Definition: QueryMemoryDescriptor.h:393

void QueryMemoryDescriptor::setHasKeylessHash ( const bool val )

inline

Definition at line 181 of file QueryMemoryDescriptor.h.

References keyless_hash_.

181 { keyless_hash_ = val; }

QueryMemoryDescriptor::keyless_hash_

bool keyless_hash_

Definition: QueryMemoryDescriptor.h:392

void QueryMemoryDescriptor::setOutputColumnar ( const bool val )

Definition at line 1105 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, isLogicalSizedColumnsAllowed(), output_columnar_, and ColSlotContext::setAllSlotsPaddedSizeToLogicalSize().

                                                             {
   output_columnar_ = val;
   if (isLogicalSizedColumnsAllowed()) {
     col_slot_context_.setAllSlotsPaddedSizeToLogicalSize();
   }
 }

Here is the call graph for this function:

void QueryMemoryDescriptor::setPaddedSlotWidthBytes	(	const size_t	slot_idx,
		const int8_t	bytes
	)

Definition at line 1193 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::setPaddedSlotWidthBytes().

Referenced by TargetExprCodegenBuilder::operator()().

                                                                         {
   col_slot_context_.setPaddedSlotWidthBytes(slot_idx, bytes);
 }

Here is the call graph for this function:

Here is the caller graph for this function:

void QueryMemoryDescriptor::setQueryDescriptionType ( const QueryDescriptionType val )

inline

Definition at line 174 of file QueryMemoryDescriptor.h.

References query_desc_type_.

174 { query_desc_type_ = val; }

QueryMemoryDescriptor::query_desc_type_

QueryDescriptionType query_desc_type_

Definition: QueryMemoryDescriptor.h:391

void QueryMemoryDescriptor::setTargetIdxForKey ( const int32_t val )

inline

Definition at line 187 of file QueryMemoryDescriptor.h.

References idx_target_as_key_.

187 { idx_target_as_key_ = val; }

QueryMemoryDescriptor::idx_target_as_key_

int32_t idx_target_as_key_

Definition: QueryMemoryDescriptor.h:394

void QueryMemoryDescriptor::setThreadsCanReuseGroupByBuffers ( const bool val )

inline

Definition at line 298 of file QueryMemoryDescriptor.h.

References threads_can_reuse_group_by_buffers_.

                                                         {
     threads_can_reuse_group_by_buffers_ = val;
   }

bool QueryMemoryDescriptor::slotIsVarlenOutput ( const size_t slot_idx ) const

inline

Definition at line 368 of file QueryMemoryDescriptor.h.

References col_slot_context_, and ColSlotContext::slotIsVarlen().

Referenced by advance_target_ptr_row_wise(), and ResultSet::makeGeoTargetValue().

                                                        {
     return col_slot_context_.slotIsVarlen(slot_idx);
   }

Here is the call graph for this function:

Here is the caller graph for this function:

bool QueryMemoryDescriptor::sortOnGpu ( ) const

inline

Definition at line 282 of file QueryMemoryDescriptor.h.

References sort_on_gpu_.

Referenced by alignPaddedSlots(), QueryExecutionContext::launchGpuCode(), ExecutionKernel::runImpl(), and use_speculative_top_n().

282 { return sort_on_gpu_; }

QueryMemoryDescriptor::sort_on_gpu_

bool sort_on_gpu_

Definition: QueryMemoryDescriptor.h:411

Here is the caller graph for this function:

size_t QueryMemoryDescriptor::targetGroupbyIndicesSize ( ) const

inline

Definition at line 252 of file QueryMemoryDescriptor.h.

References target_groupby_indices_.

Referenced by ResultSet::getTargetValueFromBufferColwise(), ResultSet::getTargetValueFromBufferRowwise(), ResultSetReductionJIT::reduceOneEntryBaseline(), ResultSetStorage::reduceOneEntrySlotsBaseline(), ResultSetReductionJIT::reduceOneEntryTargetsNoCollisions(), ResultSetReductionJIT::reduceOneSlot(), ResultSetStorage::reduceOneSlot(), and reductionKey().

252 { return target_groupby_indices_.size(); }

QueryMemoryDescriptor::target_groupby_indices_

std::vector< int64_t > target_groupby_indices_

Definition: QueryMemoryDescriptor.h:402

Here is the caller graph for this function:

size_t QueryMemoryDescriptor::targetGroupbyNegativeIndicesSize ( ) const

inline

Definition at line 253 of file QueryMemoryDescriptor.h.

References target_groupby_indices_.

                                                   {
     return std::count_if(
         target_groupby_indices_.begin(),
         target_groupby_indices_.end(),
         [](const int64_t& target_group_by_index) { return target_group_by_index < 0; });
   }

bool QueryMemoryDescriptor::threadsCanReuseGroupByBuffers ( ) const

inline

Definition at line 294 of file QueryMemoryDescriptor.h.

References threads_can_reuse_group_by_buffers_.

Referenced by Executor::launchKernelsViaResourceMgr(), QueryMemoryInitializer::QueryMemoryInitializer(), and Executor::reduceMultiDeviceResults().

                                              {
     return threads_can_reuse_group_by_buffers_;
   }

Here is the caller graph for this function:

bool QueryMemoryDescriptor::threadsShareMemory ( ) const

Definition at line 1140 of file QueryMemoryDescriptor.cpp.

References heavyai::NonGroupedAggregate, and query_desc_type_.

Referenced by canOutputColumnar(), anonymous_namespace{GpuMemUtils.cpp}::coalesced_size(), TargetExprCodegen::codegen(), TargetExprCodegen::codegenAggregate(), GroupByAndAggregate::codegenGroupBy(), create_dev_group_by_buffers(), QueryMemoryInitializer::createAndInitializeGroupByBufferGpu(), QueryExecutionContext::getRowSet(), QueryMemoryInitializer::QueryMemoryInitializer(), and toString().

                                                      {
   return query_desc_type_ != QueryDescriptionType::NonGroupedAggregate;
 }

Here is the caller graph for this function:

std::string QueryMemoryDescriptor::toString ( ) const

Definition at line 1267 of file QueryMemoryDescriptor.cpp.

References allow_multifrag_, blocksShareMemory(), bucket_, entry_count_, executor_, g_enable_lazy_fetch, GPU, interleaved_bins_on_gpu_, lazyInitGroups(), max_val_, min_val_, must_use_baseline_sort_, output_columnar_, reductionKey(), render_output_, sort_on_gpu_, threadsShareMemory(), to_string(), use_streaming_top_n_, and usesGetGroupValueFast().

Referenced by Executor::createKernels(), and reductionKey().

                                                 {
   auto str = reductionKey();
   str += "\tAllow Multifrag: " + ::toString(allow_multifrag_) + "\n";
   str += "\tInterleaved Bins on GPU: " + ::toString(interleaved_bins_on_gpu_) + "\n";
   str += "\tBlocks Share Memory: " + ::toString(blocksShareMemory()) + "\n";
   str += "\tThreads Share Memory: " + ::toString(threadsShareMemory()) + "\n";
   str += "\tUses Fast Group Values: " + ::toString(usesGetGroupValueFast()) + "\n";
   str +=
       "\tLazy Init Groups (GPU): " + ::toString(lazyInitGroups(ExecutorDeviceType::GPU)) +
       "\n";
   str += "\tEntry Count: " + std::to_string(entry_count_) + "\n";
   str += "\tMin Val (perfect hash only): " + std::to_string(min_val_) + "\n";
   str += "\tMax Val (perfect hash only): " + std::to_string(max_val_) + "\n";
   str += "\tBucket Val (perfect hash only): " + std::to_string(bucket_) + "\n";
   str += "\tSort on GPU: " + ::toString(sort_on_gpu_) + "\n";
   str += "\tUse Streaming Top N: " + ::toString(use_streaming_top_n_) + "\n";
   str += "\tOutput Columnar: " + ::toString(output_columnar_) + "\n";
   auto const allow_lazy_fetch = executor_->plan_state_
                                     ? executor_->plan_state_->allow_lazy_fetch_
                                     : g_enable_lazy_fetch;
   str += "\tAllow Lazy Fetch: " + ::toString(allow_lazy_fetch) + "\n";
   str += "\tRender Output: " + ::toString(render_output_) + "\n";
   str += "\tUse Baseline Sort: " + ::toString(must_use_baseline_sort_) + "\n";
   return str;
 }

Here is the call graph for this function:

Here is the caller graph for this function:

static TResultSetBufferDescriptor QueryMemoryDescriptor::toThrift ( const QueryMemoryDescriptor & )

static

int8_t QueryMemoryDescriptor::updateActualMinByteWidth ( const int8_t actual_min_byte_width ) const

Definition at line 1219 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getMinPaddedByteSize().

                                               {
   return col_slot_context_.getMinPaddedByteSize(actual_min_byte_width);
 }

Here is the call graph for this function:

void QueryMemoryDescriptor::useConsistentSlotWidthSize ( const int8_t slot_width_size )

Definition at line 1210 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::setAllSlotsSize().

                                                                                    {
   col_slot_context_.setAllSlotsSize(slot_width_size);
 }

Here is the call graph for this function:

bool QueryMemoryDescriptor::usesGetGroupValueFast ( ) const

Definition at line 1135 of file QueryMemoryDescriptor.cpp.

References getGroupbyColCount(), heavyai::GroupByPerfectHash, and query_desc_type_.

Referenced by canOutputColumnar(), GroupByAndAggregate::codegen(), GroupByAndAggregate::codegenSingleColumnPerfectHash(), and toString().

                                                         {
   return (query_desc_type_ == QueryDescriptionType::GroupByPerfectHash &&
           getGroupbyColCount() == 1);
 }

Here is the call graph for this function:

Here is the caller graph for this function:

bool QueryMemoryDescriptor::useStreamingTopN ( ) const

inline

Definition at line 288 of file QueryMemoryDescriptor.h.

References use_streaming_top_n_.

Referenced by GroupByAndAggregate::codegen(), GroupByAndAggregate::codegenOutputSlot(), QueryMemoryInitializer::copyGroupByBuffersFromGpu(), QueryMemoryInitializer::createAndInitializeGroupByBufferGpu(), QueryMemoryInitializer::initGroupByBuffer(), QueryExecutionContext::launchCpuCode(), and QueryExecutionContext::launchGpuCode().

288 { return use_streaming_top_n_; }

QueryMemoryDescriptor::use_streaming_top_n_

bool use_streaming_top_n_

Definition: QueryMemoryDescriptor.h:415

Here is the caller graph for this function:

std::optional< size_t > QueryMemoryDescriptor::varlenOutputBufferElemSize ( ) const

Definition at line 1339 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, ColSlotContext::getSlotCount(), and ColSlotContext::varlenOutputElementSize().

Referenced by TargetExprCodegen::codegenAggregate(), create_dev_group_by_buffers(), QueryMemoryInitializer::createAndInitializeGroupByBufferGpu(), QueryExecutionContext::launchGpuCode(), and QueryMemoryInitializer::QueryMemoryInitializer().

                                                                             {
   int64_t buffer_element_size{0};
   for (size_t i = 0; i < col_slot_context_.getSlotCount(); i++) {
     try {
       const auto slot_element_size = col_slot_context_.varlenOutputElementSize(i);
       if (slot_element_size < 0) {
         return std::nullopt;
       }
       buffer_element_size += slot_element_size;
     } catch (...) {
       continue;
     }
   }
   return buffer_element_size;
 }

Here is the call graph for this function:

Here is the caller graph for this function:

size_t QueryMemoryDescriptor::varlenOutputRowSizeToSlot ( const size_t slot_idx ) const

Definition at line 1355 of file QueryMemoryDescriptor.cpp.

References CHECK_LT, col_slot_context_, ColSlotContext::getSlotCount(), and ColSlotContext::varlenOutputElementSize().

Referenced by TargetExprCodegen::codegenAggregate().

                                                                                    {
   int64_t buffer_element_size{0};
   CHECK_LT(slot_idx, col_slot_context_.getSlotCount());
   for (size_t i = 0; i < slot_idx; i++) {
     try {
       const auto slot_element_size = col_slot_context_.varlenOutputElementSize(i);
       if (slot_element_size < 0) {
         continue;
       }
       buffer_element_size += slot_element_size;
     } catch (...) {
       continue;
     }
   }
   return buffer_element_size;
 }

Here is the call graph for this function:

Here is the caller graph for this function:

Friends And Related Function Documentation

friend class QueryExecutionContext

friend

Definition at line 432 of file QueryMemoryDescriptor.h.

Referenced by getQueryExecutionContext().

friend class ResultSet

friend

Definition at line 431 of file QueryMemoryDescriptor.h.

Member Data Documentation

bool QueryMemoryDescriptor::allow_multifrag_

private

Definition at line 390 of file QueryMemoryDescriptor.h.

Referenced by toString().

ApproxQuantileDescriptors QueryMemoryDescriptor::approx_quantile_descriptors_

private

Definition at line 409 of file QueryMemoryDescriptor.h.

Referenced by getApproxQuantileDescriptors().

int64_t QueryMemoryDescriptor::bucket_

private

Definition at line 407 of file QueryMemoryDescriptor.h.

Referenced by blocksShareMemory(), getBucket(), operator==(), and toString().

ColSlotContext QueryMemoryDescriptor::col_slot_context_

private

CountDistinctDescriptors QueryMemoryDescriptor::count_distinct_descriptors_

private

Definition at line 410 of file QueryMemoryDescriptor.h.

Referenced by blocksShareMemory(), canOutputColumnar(), countDistinctDescriptorsLogicallyEmpty(), getCountDistinctDescriptor(), getCountDistinctDescriptorsSize(), ResultSet::getTargetValueFromBufferRowwise(), lazyInitGroups(), operator==(), and QueryMemoryDescriptor().

size_t QueryMemoryDescriptor::entry_count_

private

Definition at line 403 of file QueryMemoryDescriptor.h.

Referenced by getBufferSizeBytes(), getColOffInBytes(), getEntryCount(), getNextColOffInBytes(), getPaddedSlotBufferSize(), getTotalBytesOfColumnarBuffers(), setEntryCount(), and toString().

const Executor* QueryMemoryDescriptor::executor_

private

Definition at line 389 of file QueryMemoryDescriptor.h.

Referenced by blocksShareMemory(), getBufferSizeBytes(), getExecutor(), getWarpCount(), isWarpSyncRequired(), QueryMemoryDescriptor(), and toString().

bool QueryMemoryDescriptor::force_4byte_float_

private

Definition at line 417 of file QueryMemoryDescriptor.h.

Referenced by forceFourByteFloat(), operator==(), and setForceFourByteFloat().

int8_t QueryMemoryDescriptor::group_col_compact_width_

private

Definition at line 399 of file QueryMemoryDescriptor.h.

Referenced by getEffectiveKeyWidth(), operator==(), and setGroupColCompactWidth().

std::vector<int8_t> QueryMemoryDescriptor::group_col_widths_

private

Definition at line 398 of file QueryMemoryDescriptor.h.

Referenced by clearGroupColWidths(), getBufferSizeBytes(), getColOffInBytes(), getColOffInBytesInNextBin(), getGroupbyColCount(), getNextColOffInBytes(), getRowSize(), groupColWidth(), groupColWidthsBegin(), groupColWidthsEnd(), isGroupBy(), operator==(), and resetGroupColWidths().

bool QueryMemoryDescriptor::has_nulls_

private

Definition at line 408 of file QueryMemoryDescriptor.h.

Referenced by hasNulls(), and operator==().

int32_t QueryMemoryDescriptor::idx_target_as_key_

private

Definition at line 394 of file QueryMemoryDescriptor.h.

Referenced by getTargetIdxForKey(), operator==(), and setTargetIdxForKey().

bool QueryMemoryDescriptor::interleaved_bins_on_gpu_

private

Definition at line 393 of file QueryMemoryDescriptor.h.

Referenced by getWarpCount(), hasInterleavedBinsOnGpu(), interleavedBins(), operator==(), setHasInterleavedBinsOnGpu(), and toString().

bool QueryMemoryDescriptor::keyless_hash_

private

Definition at line 392 of file QueryMemoryDescriptor.h.

Referenced by getBufferSizeBytes(), getColOffInBytes(), getKeyCount(), getRowSize(), hasKeylessHash(), operator==(), QueryMemoryDescriptor(), reductionKey(), and setHasKeylessHash().

int64_t QueryMemoryDescriptor::max_val_

private

Definition at line 406 of file QueryMemoryDescriptor.h.

Referenced by blocksShareMemory(), getMaxVal(), operator==(), and toString().

int64_t QueryMemoryDescriptor::min_val_

private

Definition at line 404 of file QueryMemoryDescriptor.h.

Referenced by blocksShareMemory(), getMinVal(), operator==(), and toString().

bool QueryMemoryDescriptor::must_use_baseline_sort_

private

Definition at line 414 of file QueryMemoryDescriptor.h.

Referenced by mustUseBaselineSort(), and toString().

size_t QueryMemoryDescriptor::num_available_threads_ {1}

mutableprivate

Definition at line 425 of file QueryMemoryDescriptor.h.

Referenced by getAvailableCpuThreads(), and setAvailableCpuThreads().

bool QueryMemoryDescriptor::output_columnar_

private

Definition at line 412 of file QueryMemoryDescriptor.h.

Referenced by didOutputColumnar(), getBufferSizeBytes(), getColOffInBytes(), getColOffInBytesInNextBin(), getNextColOffInBytes(), getPrependedGroupBufferSizeInBytes(), getPrependedGroupColOffInBytes(), getRowSize(), getTotalBytesOfColumnarBuffers(), isLogicalSizedColumnsAllowed(), operator==(), QueryMemoryDescriptor(), setOutputColumnar(), and toString().

QueryDescriptionType QueryMemoryDescriptor::query_desc_type_

private

Definition at line 391 of file QueryMemoryDescriptor.h.

Referenced by blocksShareMemory(), canUsePerDeviceCardinality(), getBufferSizeBytes(), getColOffInBytes(), getQueryDescriptionType(), getRowSize(), isLogicalSizedColumnsAllowed(), operator==(), queryDescTypeToString(), QueryMemoryDescriptor(), setQueryDescriptionType(), threadsShareMemory(), and usesGetGroupValueFast().

bool QueryMemoryDescriptor::render_output_

private

Definition at line 413 of file QueryMemoryDescriptor.h.

Referenced by blocksShareMemory(), lazyInitGroups(), and toString().

bool QueryMemoryDescriptor::sort_on_gpu_

private

Definition at line 411 of file QueryMemoryDescriptor.h.

Referenced by operator==(), QueryMemoryDescriptor(), sortOnGpu(), and toString().

std::vector<int64_t> QueryMemoryDescriptor::target_groupby_indices_

private

Definition at line 402 of file QueryMemoryDescriptor.h.

Referenced by clearTargetGroupbyIndices(), getBufferColSlotCount(), getTargetGroupbyIndex(), operator==(), setAllTargetGroupbyIndices(), targetGroupbyIndicesSize(), and targetGroupbyNegativeIndicesSize().

bool QueryMemoryDescriptor::threads_can_reuse_group_by_buffers_

private

Definition at line 416 of file QueryMemoryDescriptor.h.

Referenced by operator==(), setThreadsCanReuseGroupByBuffers(), and threadsCanReuseGroupByBuffers().

bool QueryMemoryDescriptor::use_streaming_top_n_

private

Definition at line 415 of file QueryMemoryDescriptor.h.

Referenced by getBufferSizeBytes(), QueryMemoryDescriptor(), toString(), and useStreamingTopN().

The documentation for this class was generated from the following files:

/home/jenkins-slave/workspace/core-os-doxygen/QueryEngine/Descriptors/QueryMemoryDescriptor.h
/home/jenkins-slave/workspace/core-os-doxygen/QueryEngine/Descriptors/QueryMemoryDescriptor.cpp

Public Member Functions

Static Public Member Functions

Protected Member Functions

Private Member Functions

Private Attributes

Friends

Detailed Description

Constructor & Destructor Documentation

Member Function Documentation

Friends And Related Function Documentation

Member Data Documentation