OmniSciDB  a987f07e93
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
QueryMemoryDescriptor Class Reference

#include <QueryMemoryDescriptor.h>

+ Collaboration diagram for QueryMemoryDescriptor:

Public Member Functions

 QueryMemoryDescriptor ()
 
 QueryMemoryDescriptor (const Executor *executor, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const bool allow_multifrag, const bool keyless_hash, const bool interleaved_bins_on_gpu, const int32_t idx_target_as_key, const ColRangeInfo &col_range_info, const ColSlotContext &col_slot_context, const std::vector< int8_t > &group_col_widths, const int8_t group_col_compact_width, const std::vector< int64_t > &target_groupby_indices, const size_t entry_count, const CountDistinctDescriptors count_distinct_descriptors, const bool sort_on_gpu_hint, const bool output_columnar, const bool render_output, const bool must_use_baseline_sort, const bool use_streaming_top_n)
 
 QueryMemoryDescriptor (const Executor *executor, const size_t entry_count, const QueryDescriptionType query_desc_type, const bool is_table_function)
 
 QueryMemoryDescriptor (const QueryDescriptionType query_desc_type, const int64_t min_val, const int64_t max_val, const bool has_nulls, const std::vector< int8_t > &group_col_widths)
 
 QueryMemoryDescriptor (const TResultSetBufferDescriptor &thrift_query_memory_descriptor)
 
bool operator== (const QueryMemoryDescriptor &other) const
 
std::unique_ptr
< QueryExecutionContext
getQueryExecutionContext (const RelAlgExecutionUnit &, const Executor *executor, const ExecutorDeviceType device_type, const ExecutorDispatchMode dispatch_mode, const int device_id, const int outer_table_id, const int64_t num_rows, const std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< std::vector< uint64_t >> &frag_offsets, std::shared_ptr< RowSetMemoryOwner >, const bool output_columnar, const bool sort_on_gpu, const size_t thread_idx, RenderInfo *) const
 
bool countDistinctDescriptorsLogicallyEmpty () const
 
const ExecutorgetExecutor () const
 
QueryDescriptionType getQueryDescriptionType () const
 
void setQueryDescriptionType (const QueryDescriptionType val)
 
bool isSingleColumnGroupByWithPerfectHash () const
 
bool hasKeylessHash () const
 
void setHasKeylessHash (const bool val)
 
bool hasInterleavedBinsOnGpu () const
 
void setHasInterleavedBinsOnGpu (const bool val)
 
int32_t getTargetIdxForKey () const
 
void setTargetIdxForKey (const int32_t val)
 
int8_t groupColWidth (const size_t key_idx) const
 
size_t getPrependedGroupColOffInBytes (const size_t group_idx) const
 
size_t getPrependedGroupBufferSizeInBytes () const
 
const auto groupColWidthsBegin () const
 
const auto groupColWidthsEnd () const
 
void clearGroupColWidths ()
 
bool isGroupBy () const
 
void setGroupColCompactWidth (const int8_t val)
 
size_t getColCount () const
 
size_t getSlotCount () const
 
const int8_t getPaddedSlotWidthBytes (const size_t slot_idx) const
 
const int8_t getLogicalSlotWidthBytes (const size_t slot_idx) const
 
void setPaddedSlotWidthBytes (const size_t slot_idx, const int8_t bytes)
 
const int8_t getSlotIndexForSingleSlotCol (const size_t col_idx) const
 
size_t getPaddedColWidthForRange (const size_t offset, const size_t range) const
 
void useConsistentSlotWidthSize (const int8_t slot_width_size)
 
size_t getRowWidth () const
 
int8_t updateActualMinByteWidth (const int8_t actual_min_byte_width) const
 
void addColSlotInfo (const std::vector< std::tuple< int8_t, int8_t >> &slots_for_col)
 
void addColSlotInfoFlatBuffer (const int64_t flatbuffer_size)
 
int64_t getFlatBufferSize (const size_t slot_idx) const
 
void clearSlotInfo ()
 
void alignPaddedSlots ()
 
int64_t getTargetGroupbyIndex (const size_t target_idx) const
 
void setAllTargetGroupbyIndices (std::vector< int64_t > group_by_indices)
 
size_t targetGroupbyIndicesSize () const
 
size_t targetGroupbyNegativeIndicesSize () const
 
void clearTargetGroupbyIndices ()
 
size_t getEntryCount () const
 
void setEntryCount (const size_t val)
 
int64_t getMinVal () const
 
int64_t getMaxVal () const
 
int64_t getBucket () const
 
bool hasNulls () const
 
const CountDistinctDescriptorgetCountDistinctDescriptor (const size_t idx) const
 
size_t getCountDistinctDescriptorsSize () const
 
bool sortOnGpu () const
 
bool canOutputColumnar () const
 
bool didOutputColumnar () const
 
void setOutputColumnar (const bool val)
 
bool useStreamingTopN () const
 
bool isLogicalSizedColumnsAllowed () const
 
bool mustUseBaselineSort () const
 
bool forceFourByteFloat () const
 
void setForceFourByteFloat (const bool val)
 
size_t getGroupbyColCount () const
 
size_t getKeyCount () const
 
size_t getBufferColSlotCount () const
 
size_t getBufferSizeBytes (const RelAlgExecutionUnit &ra_exe_unit, const unsigned thread_count, const ExecutorDeviceType device_type) const
 
size_t getBufferSizeBytes (const ExecutorDeviceType device_type) const
 
size_t getBufferSizeBytes (const ExecutorDeviceType device_type, const size_t override_entry_count) const
 
const ColSlotContextgetColSlotContext () const
 
bool usesGetGroupValueFast () const
 
bool blocksShareMemory () const
 
bool threadsShareMemory () const
 
bool lazyInitGroups (const ExecutorDeviceType) const
 
bool interleavedBins (const ExecutorDeviceType) const
 
size_t getColOffInBytes (const size_t col_idx) const
 
size_t getColOffInBytesInNextBin (const size_t col_idx) const
 
size_t getNextColOffInBytes (const int8_t *col_ptr, const size_t bin, const size_t col_idx) const
 
size_t getNextColOffInBytesRowOnly (const int8_t *col_ptr, const size_t col_idx) const
 
size_t getColOnlyOffInBytes (const size_t col_idx) const
 
size_t getRowSize () const
 
size_t getColsSize () const
 
size_t getWarpCount () const
 
size_t getCompactByteWidth () const
 
size_t getEffectiveKeyWidth () const
 
bool isWarpSyncRequired (const ExecutorDeviceType) const
 
std::string queryDescTypeToString () const
 
std::string toString () const
 
std::string reductionKey () const
 
bool hasVarlenOutput () const
 
std::optional< size_t > varlenOutputBufferElemSize () const
 
size_t varlenOutputRowSizeToSlot (const size_t slot_idx) const
 
bool slotIsVarlenOutput (const size_t slot_idx) const
 

Static Public Member Functions

static TResultSetBufferDescriptor toThrift (const QueryMemoryDescriptor &)
 
static std::unique_ptr
< QueryMemoryDescriptor
init (const Executor *executor, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const ColRangeInfo &col_range_info, const KeylessInfo &keyless_info, const bool allow_multifrag, const ExecutorDeviceType device_type, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, const size_t shard_count, const size_t max_groups_buffer_entry_count, RenderInfo *render_info, const CountDistinctDescriptors count_distinct_descriptors, const bool must_use_baseline_sort, const bool output_columnar_hint, const bool streaming_top_n_hint)
 
static bool many_entries (const int64_t max_val, const int64_t min_val, const int64_t bucket)
 
static bool countDescriptorsLogicallyEmpty (const CountDistinctDescriptors &count_distinct_descriptors)
 
static int8_t pick_target_compact_width (const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const int8_t crt_min_byte_width)
 

Protected Member Functions

void resetGroupColWidths (const std::vector< int8_t > &new_group_col_widths)
 

Private Member Functions

size_t getTotalBytesOfColumnarBuffers () const
 
size_t getTotalBytesOfColumnarBuffers (const size_t num_entries_per_column) const
 
size_t getTotalBytesOfColumnarProjections (const size_t projection_count) const
 

Private Attributes

const Executorexecutor_
 
bool allow_multifrag_
 
QueryDescriptionType query_desc_type_
 
bool keyless_hash_
 
bool interleaved_bins_on_gpu_
 
int32_t idx_target_as_key_
 
std::vector< int8_t > group_col_widths_
 
int8_t group_col_compact_width_
 
std::vector< int64_t > target_groupby_indices_
 
size_t entry_count_
 
int64_t min_val_
 
int64_t max_val_
 
int64_t bucket_
 
bool has_nulls_
 
CountDistinctDescriptors count_distinct_descriptors_
 
bool sort_on_gpu_
 
bool output_columnar_
 
bool render_output_
 
bool must_use_baseline_sort_
 
bool is_table_function_
 
bool use_streaming_top_n_
 
bool force_4byte_float_
 
ColSlotContext col_slot_context_
 

Friends

class ResultSet
 
class QueryExecutionContext
 

Detailed Description

Definition at line 66 of file QueryMemoryDescriptor.h.

Constructor & Destructor Documentation

QueryMemoryDescriptor::QueryMemoryDescriptor ( )

Definition at line 546 of file QueryMemoryDescriptor.cpp.

References Projection.

547  : executor_(nullptr)
548  , allow_multifrag_(false)
550  , keyless_hash_(false)
551  , interleaved_bins_on_gpu_(false)
552  , idx_target_as_key_(0)
554  , entry_count_(0)
555  , min_val_(0)
556  , max_val_(0)
557  , bucket_(0)
558  , has_nulls_(false)
559  , sort_on_gpu_(false)
560  , output_columnar_(false)
561  , render_output_(false)
562  , must_use_baseline_sort_(false)
563  , is_table_function_(false)
564  , use_streaming_top_n_(false)
565  , force_4byte_float_(false) {}
QueryDescriptionType query_desc_type_
QueryMemoryDescriptor::QueryMemoryDescriptor ( const Executor executor,
const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  query_infos,
const bool  allow_multifrag,
const bool  keyless_hash,
const bool  interleaved_bins_on_gpu,
const int32_t  idx_target_as_key,
const ColRangeInfo col_range_info,
const ColSlotContext col_slot_context,
const std::vector< int8_t > &  group_col_widths,
const int8_t  group_col_compact_width,
const std::vector< int64_t > &  target_groupby_indices,
const size_t  entry_count,
const CountDistinctDescriptors  count_distinct_descriptors,
const bool  sort_on_gpu_hint,
const bool  output_columnar,
const bool  render_output,
const bool  must_use_baseline_sort,
const bool  use_streaming_top_n 
)

Definition at line 449 of file QueryMemoryDescriptor.cpp.

References canOutputColumnar(), CHECK, col_slot_context_, count_distinct_descriptors_, countDescriptorsLogicallyEmpty(), executor_, streaming_top_n::get_heap_size(), getEntryCount(), getRowSize(), GroupByBaselineHash, GroupByPerfectHash, isLogicalSizedColumnsAllowed(), keyless_hash_, NonGroupedAggregate, output_columnar_, Projection, query_desc_type_, ColSlotContext::setAllSlotsPaddedSizeToLogicalSize(), ColSlotContext::setAllUnsetSlotsPaddedSize(), sort_on_gpu_, TableFunction, RelAlgExecutionUnit::target_exprs, RelAlgExecutionUnit::use_bump_allocator, use_streaming_top_n_, and ColSlotContext::validate().

469  : executor_(executor)
470  , allow_multifrag_(allow_multifrag)
471  , query_desc_type_(col_range_info.hash_type_)
472  , keyless_hash_(keyless_hash)
473  , interleaved_bins_on_gpu_(interleaved_bins_on_gpu)
474  , idx_target_as_key_(idx_target_as_key)
475  , group_col_widths_(group_col_widths)
476  , group_col_compact_width_(group_col_compact_width)
477  , target_groupby_indices_(target_groupby_indices)
478  , entry_count_(entry_count)
479  , min_val_(col_range_info.min)
480  , max_val_(col_range_info.max)
481  , bucket_(col_range_info.bucket)
482  , has_nulls_(col_range_info.has_nulls)
483  , count_distinct_descriptors_(count_distinct_descriptors)
484  , output_columnar_(false)
485  , render_output_(render_output)
486  , must_use_baseline_sort_(must_use_baseline_sort)
487  , is_table_function_(false)
489  , force_4byte_float_(false)
490  , col_slot_context_(col_slot_context) {
494 
495  sort_on_gpu_ = sort_on_gpu_hint && canOutputColumnar() && !keyless_hash_;
496  if (sort_on_gpu_) {
497  CHECK(!ra_exe_unit.use_bump_allocator);
498  output_columnar_ = true;
499  } else {
500  switch (query_desc_type_) {
502  output_columnar_ = output_columnar_hint;
503  break;
505  output_columnar_ = output_columnar_hint &&
508  !any_of<kAPPROX_QUANTILE, kMODE>(ra_exe_unit.target_exprs);
509  break;
511  output_columnar_ = output_columnar_hint;
512  break;
514  output_columnar_ = output_columnar_hint &&
517  !any_of<kAPPROX_QUANTILE, kMODE>(ra_exe_unit.target_exprs);
518  break;
519  default:
520  output_columnar_ = false;
521  break;
522  }
523  }
524 
526  // TODO(adb): Ensure fixed size buffer allocations are correct with all logical column
527  // sizes
528  CHECK(!ra_exe_unit.use_bump_allocator);
531  }
532 
533 #ifdef HAVE_CUDA
534  // Check Streaming Top N heap usage, bail if > max slab size, CUDA ONLY
535  if (use_streaming_top_n_ && executor->getDataMgr()->gpusPresent()) {
536  const auto thread_count = executor->blockSize() * executor->gridSize();
537  const auto total_buff_size =
539  if (total_buff_size > executor_->maxGpuSlabSize()) {
540  throw StreamingTopNOOM(total_buff_size);
541  }
542  }
543 #endif
544 }
std::vector< Analyzer::Expr * > target_exprs
bool isLogicalSizedColumnsAllowed() const
QueryDescriptionType hash_type_
bool use_streaming_top_n(const RelAlgExecutionUnit &ra_exe_unit, const bool output_columnar)
std::vector< int64_t > target_groupby_indices_
CountDistinctDescriptors count_distinct_descriptors_
void validate() const
QueryDescriptionType query_desc_type_
size_t get_heap_size(const size_t row_size, const size_t n, const size_t thread_count)
void setAllSlotsPaddedSizeToLogicalSize()
#define CHECK(condition)
Definition: Logger.h:289
std::vector< int8_t > group_col_widths_
static bool countDescriptorsLogicallyEmpty(const CountDistinctDescriptors &count_distinct_descriptors)
void setAllUnsetSlotsPaddedSize(const int8_t padded_size)

+ Here is the call graph for this function:

QueryMemoryDescriptor::QueryMemoryDescriptor ( const Executor executor,
const size_t  entry_count,
const QueryDescriptionType  query_desc_type,
const bool  is_table_function 
)

Definition at line 567 of file QueryMemoryDescriptor.cpp.

571  : executor_(executor)
572  , allow_multifrag_(false)
573  , query_desc_type_(query_desc_type)
574  , keyless_hash_(false)
575  , interleaved_bins_on_gpu_(false)
576  , idx_target_as_key_(0)
578  , entry_count_(entry_count)
579  , min_val_(0)
580  , max_val_(0)
581  , bucket_(0)
582  , has_nulls_(false)
583  , sort_on_gpu_(false)
584  , output_columnar_(false)
585  , render_output_(false)
586  , must_use_baseline_sort_(false)
587  , is_table_function_(is_table_function)
588  , use_streaming_top_n_(false)
589  , force_4byte_float_(false) {}
QueryDescriptionType query_desc_type_
QueryMemoryDescriptor::QueryMemoryDescriptor ( const QueryDescriptionType  query_desc_type,
const int64_t  min_val,
const int64_t  max_val,
const bool  has_nulls,
const std::vector< int8_t > &  group_col_widths 
)

Definition at line 591 of file QueryMemoryDescriptor.cpp.

596  : executor_(nullptr)
597  , allow_multifrag_(false)
598  , query_desc_type_(query_desc_type)
599  , keyless_hash_(false)
600  , interleaved_bins_on_gpu_(false)
601  , idx_target_as_key_(0)
602  , group_col_widths_(group_col_widths)
604  , entry_count_(0)
605  , min_val_(min_val)
606  , max_val_(max_val)
607  , bucket_(0)
608  , has_nulls_(false)
609  , sort_on_gpu_(false)
610  , output_columnar_(false)
611  , render_output_(false)
612  , must_use_baseline_sort_(false)
613  , is_table_function_(false)
614  , use_streaming_top_n_(false)
615  , force_4byte_float_(false) {}
QueryDescriptionType query_desc_type_
std::vector< int8_t > group_col_widths_
QueryMemoryDescriptor::QueryMemoryDescriptor ( const TResultSetBufferDescriptor &  thrift_query_memory_descriptor)

Member Function Documentation

void QueryMemoryDescriptor::addColSlotInfo ( const std::vector< std::tuple< int8_t, int8_t >> &  slots_for_col)

Definition at line 1200 of file QueryMemoryDescriptor.cpp.

References ColSlotContext::addColumn(), and col_slot_context_.

Referenced by TableFunctionManager::allocate_output_buffers(), ResultSetLogicalValuesBuilder::create(), and TableFunctionExecutionContext::launchGpuCode().

1201  {
1202  col_slot_context_.addColumn(slots_for_col);
1203 }
void addColumn(const std::vector< std::tuple< int8_t, int8_t >> &slots_for_col)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void QueryMemoryDescriptor::addColSlotInfoFlatBuffer ( const int64_t  flatbuffer_size)

Definition at line 1205 of file QueryMemoryDescriptor.cpp.

References ColSlotContext::addColumnFlatBuffer(), and col_slot_context_.

Referenced by TableFunctionManager::allocate_output_buffers().

1205  {
1206  col_slot_context_.addColumnFlatBuffer(flatbuffer_size);
1207 }
void addColumnFlatBuffer(const int64_t flatbuffer_size)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void QueryMemoryDescriptor::alignPaddedSlots ( )

Definition at line 1213 of file QueryMemoryDescriptor.cpp.

References ColSlotContext::alignPaddedSlots(), col_slot_context_, and sortOnGpu().

1213  {
1215 }
void alignPaddedSlots(const bool sort_on_gpu)

+ Here is the call graph for this function:

bool QueryMemoryDescriptor::blocksShareMemory ( ) const

Definition at line 1120 of file QueryMemoryDescriptor.cpp.

References bucket_, count_distinct_descriptors_, countDescriptorsLogicallyEmpty(), executor_, g_cluster, getGroupbyColCount(), GroupByBaselineHash, GroupByPerfectHash, many_entries(), max_val_, min_val_, Projection, query_desc_type_, render_output_, and TableFunction.

Referenced by canOutputColumnar(), ResultSetReductionJIT::codegen(), QueryMemoryInitializer::computeNumberOfBuffers(), copy_group_by_buffers_from_gpu(), create_dev_group_by_buffers(), and toString().

1120  {
1121  if (g_cluster) {
1122  return true;
1123  }
1125  return true;
1126  }
1127  if (executor_->isCPUOnly() || render_output_ ||
1132  getGroupbyColCount() > 1)) {
1133  return true;
1134  }
1137 }
static bool many_entries(const int64_t max_val, const int64_t min_val, const int64_t bucket)
size_t getGroupbyColCount() const
CountDistinctDescriptors count_distinct_descriptors_
QueryDescriptionType query_desc_type_
bool g_cluster
static bool countDescriptorsLogicallyEmpty(const CountDistinctDescriptors &count_distinct_descriptors)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::canOutputColumnar ( ) const

Definition at line 1217 of file QueryMemoryDescriptor.cpp.

References blocksShareMemory(), count_distinct_descriptors_, countDescriptorsLogicallyEmpty(), GPU, interleavedBins(), threadsShareMemory(), and usesGetGroupValueFast().

Referenced by QueryMemoryDescriptor().

1217  {
1221 }
CountDistinctDescriptors count_distinct_descriptors_
bool interleavedBins(const ExecutorDeviceType) const
static bool countDescriptorsLogicallyEmpty(const CountDistinctDescriptors &count_distinct_descriptors)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void QueryMemoryDescriptor::clearGroupColWidths ( )
inline

Definition at line 193 of file QueryMemoryDescriptor.h.

References group_col_widths_.

193 { group_col_widths_.clear(); }
std::vector< int8_t > group_col_widths_
void QueryMemoryDescriptor::clearSlotInfo ( )

Definition at line 1209 of file QueryMemoryDescriptor.cpp.

References ColSlotContext::clear(), and col_slot_context_.

1209  {
1211 }

+ Here is the call graph for this function:

void QueryMemoryDescriptor::clearTargetGroupbyIndices ( )
inline

Definition at line 250 of file QueryMemoryDescriptor.h.

References target_groupby_indices_.

250 { target_groupby_indices_.clear(); }
std::vector< int64_t > target_groupby_indices_
static bool QueryMemoryDescriptor::countDescriptorsLogicallyEmpty ( const CountDistinctDescriptors count_distinct_descriptors)
inlinestatic

Definition at line 148 of file QueryMemoryDescriptor.h.

References Invalid.

Referenced by blocksShareMemory(), canOutputColumnar(), countDistinctDescriptorsLogicallyEmpty(), lazyInitGroups(), and QueryMemoryDescriptor().

149  {
150  return std::all_of(count_distinct_descriptors.begin(),
151  count_distinct_descriptors.end(),
152  [](const CountDistinctDescriptor& desc) {
153  return desc.impl_type_ == CountDistinctImplType::Invalid;
154  });
155  }

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::countDistinctDescriptorsLogicallyEmpty ( ) const
inline

Definition at line 157 of file QueryMemoryDescriptor.h.

References count_distinct_descriptors_, and countDescriptorsLogicallyEmpty().

Referenced by QueryMemoryInitializer::allocateCountDistinctGpuMem().

157  {
159  }
CountDistinctDescriptors count_distinct_descriptors_
static bool countDescriptorsLogicallyEmpty(const CountDistinctDescriptors &count_distinct_descriptors)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::didOutputColumnar ( ) const
inline

Definition at line 272 of file QueryMemoryDescriptor.h.

References output_columnar_.

Referenced by ResultSetStorage::binSearchRowCount(), TargetExprCodegen::codegen(), ResultSetReductionJIT::codegen(), GroupByAndAggregate::codegen(), GroupByAndAggregate::codegenAggCalls(), GroupByAndAggregate::codegenAggColumnPtr(), TargetExprCodegen::codegenAggregate(), GroupByAndAggregate::codegenGroupBy(), GroupByAndAggregate::codegenMultiColumnBaselineHash(), GroupByAndAggregate::codegenMultiColumnPerfectHash(), GroupByAndAggregate::codegenOutputSlot(), GroupByAndAggregate::codegenSingleColumnPerfectHash(), GroupByAndAggregate::codegenWindowRowPointer(), copy_projection_buffer_from_gpu_columnar(), ResultSetStorage::copyKeyColWise(), ResultSet::createComparator(), ResultSet::didOutputColumnar(), ResultSet::eachCellInColumn(), anonymous_namespace{ResultSetReduction.cpp}::fill_slots(), ResultSetStorage::fillOneEntryColWise(), ResultSetStorage::fillOneEntryRowWise(), ResultSet::fixupQueryMemoryDescriptor(), get_cols_ptr(), ResultSet::getTargetValueFromBufferColwise(), GpuReductionHelperJIT::GpuReductionHelperJIT(), GpuSharedMemCodeBuilder::GpuSharedMemCodeBuilder(), ResultSetStorage::initializeBaselineValueSlots(), anonymous_namespace{TargetExprBuilder.cpp}::is_columnar_projection(), ResultSetReductionJIT::isEmpty(), ResultSetStorage::isEmptyEntry(), ResultSetStorage::isEmptyEntryColumnar(), QueryExecutionContext::launchCpuCode(), QueryExecutionContext::launchGpuCode(), ResultSet::makeGeoTargetValue(), ResultSetStorage::moveOneEntryToBuffer(), ResultSetStorage::reduce(), ResultSetStorage::reduceOneEntryBaseline(), ResultSetReductionJIT::reduceOneEntryBaselineIdx(), ResultSetStorage::reduceOneEntrySlotsBaseline(), ResultSetStorage::reduceOneSlotBaseline(), ResultSetStorage::reduceSingleRow(), and ResultSetStorage::rewriteAggregateBufferOffsets().

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::forceFourByteFloat ( ) const
inline

Definition at line 283 of file QueryMemoryDescriptor.h.

References force_4byte_float_.

Referenced by ResultSet::makeTargetValue().

+ Here is the caller graph for this function:

int64_t QueryMemoryDescriptor::getBucket ( ) const
inline

Definition at line 257 of file QueryMemoryDescriptor.h.

References bucket_.

Referenced by GroupByAndAggregate::codegenGroupBy(), and GroupByAndAggregate::codegenSingleColumnPerfectHash().

257 { return bucket_; }

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getBufferColSlotCount ( ) const

Definition at line 1100 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, ColSlotContext::getSlotCount(), and target_groupby_indices_.

Referenced by anonymous_namespace{ResultSetIteration.cpp}::advance_col_buff_to_slot(), QueryMemoryInitializer::copyFromTableFunctionGpuBuffers(), anonymous_namespace{ResultSetReduction.cpp}::fill_slots(), ResultSetStorage::fillOneEntryColWise(), ResultSetStorage::fillOneEntryRowWise(), and QueryMemoryInitializer::setupTableFunctionGpuBuffers().

1100  {
1101  size_t total_slot_count = col_slot_context_.getSlotCount();
1102 
1103  if (target_groupby_indices_.empty()) {
1104  return total_slot_count;
1105  }
1106  return total_slot_count - std::count_if(target_groupby_indices_.begin(),
1108  [](const int64_t i) { return i >= 0; });
1109 }
std::vector< int64_t > target_groupby_indices_
size_t getSlotCount() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getBufferSizeBytes ( const RelAlgExecutionUnit ra_exe_unit,
const unsigned  thread_count,
const ExecutorDeviceType  device_type 
) const

Definition at line 1022 of file QueryMemoryDescriptor.cpp.

References entry_count_, streaming_top_n::get_heap_size(), getRowSize(), SortInfo::limit, anonymous_namespace{Utm.h}::n, SortInfo::offset, RelAlgExecutionUnit::sort_info, and use_streaming_top_n_.

Referenced by QueryMemoryInitializer::applyStreamingTopNOffsetCpu(), QueryMemoryInitializer::copyGroupByBuffersFromGpu(), create_dev_group_by_buffers(), QueryMemoryInitializer::createAndInitializeGroupByBufferGpu(), getBufferSizeBytes(), and QueryMemoryInitializer::QueryMemoryInitializer().

1025  {
1026  if (use_streaming_top_n_) {
1027  const size_t n = ra_exe_unit.sort_info.offset + ra_exe_unit.sort_info.limit;
1028  return streaming_top_n::get_heap_size(getRowSize(), n, thread_count);
1029  }
1030  return getBufferSizeBytes(device_type, entry_count_);
1031 }
size_t getBufferSizeBytes(const RelAlgExecutionUnit &ra_exe_unit, const unsigned thread_count, const ExecutorDeviceType device_type) const
const size_t limit
size_t get_heap_size(const size_t row_size, const size_t n, const size_t thread_count)
constexpr double n
Definition: Utm.h:38
const size_t offset

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getBufferSizeBytes ( const ExecutorDeviceType  device_type) const

Definition at line 1076 of file QueryMemoryDescriptor.cpp.

References entry_count_, and getBufferSizeBytes().

1077  {
1078  return getBufferSizeBytes(device_type, entry_count_);
1079 }
size_t getBufferSizeBytes(const RelAlgExecutionUnit &ra_exe_unit, const unsigned thread_count, const ExecutorDeviceType device_type) const

+ Here is the call graph for this function:

size_t QueryMemoryDescriptor::getBufferSizeBytes ( const ExecutorDeviceType  device_type,
const size_t  entry_count 
) const

Returns total amount of output buffer memory for each device (CPU/GPU)

Columnar: if projection: it returns index buffer + columnar buffer (all non-lazy columns) if table function: only the columnar buffer if group by: it returns the amount required for each group column (assumes 64-bit per group) + columnar buffer (all involved agg columns)

Row-wise: returns required memory per row multiplied by number of entries

Definition at line 1045 of file QueryMemoryDescriptor.cpp.

References align_to_int64(), CHECK_GE, executor_, getColsSize(), getRowSize(), getTotalBytesOfColumnarBuffers(), group_col_widths_, interleavedBins(), keyless_hash_, output_columnar_, Projection, query_desc_type_, and TableFunction.

1046  {
1047  if (keyless_hash_ && !output_columnar_) {
1048  CHECK_GE(group_col_widths_.size(), size_t(1));
1049  auto row_bytes = align_to_int64(getColsSize());
1050 
1051  return (interleavedBins(device_type) ? executor_->warpSize() : 1) * entry_count *
1052  row_bytes;
1053  }
1054 
1055  constexpr size_t row_index_width = sizeof(int64_t);
1056  size_t total_bytes{0};
1057  if (output_columnar_) {
1058  switch (query_desc_type_) {
1060  total_bytes = row_index_width * entry_count + getTotalBytesOfColumnarBuffers();
1061  break;
1063  total_bytes = getTotalBytesOfColumnarBuffers();
1064  break;
1065  default:
1066  total_bytes = sizeof(int64_t) * group_col_widths_.size() * entry_count +
1068  break;
1069  }
1070  } else {
1071  total_bytes = getRowSize() * entry_count;
1072  }
1073  return total_bytes;
1074 }
#define CHECK_GE(x, y)
Definition: Logger.h:302
size_t getTotalBytesOfColumnarBuffers() const
QueryDescriptionType query_desc_type_
bool interleavedBins(const ExecutorDeviceType) const
std::vector< int8_t > group_col_widths_
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)

+ Here is the call graph for this function:

size_t QueryMemoryDescriptor::getColCount ( ) const

Definition at line 1157 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getColCount().

1157  {
1158  return col_slot_context_.getColCount();
1159 }
size_t getColCount() const

+ Here is the call graph for this function:

size_t QueryMemoryDescriptor::getColOffInBytes ( const size_t  col_idx) const

Definition at line 888 of file QueryMemoryDescriptor.cpp.

References align_to_int64(), CHECK, CHECK_EQ, CHECK_GT, entry_count_, getColOnlyOffInBytes(), getEffectiveKeyWidth(), getFlatBufferSize(), getPaddedSlotWidthBytes(), getPrependedGroupBufferSizeInBytes(), getWarpCount(), group_col_widths_, GroupByPerfectHash, is_table_function_, keyless_hash_, output_columnar_, and query_desc_type_.

Referenced by TargetExprCodegen::codegen(), GroupByAndAggregate::codegenAggColumnPtr(), GroupByAndAggregate::codegenOutputSlot(), anonymous_namespace{QueryMemoryInitializer.cpp}::compact_projection_buffer_for_cpu_columnar(), copy_projection_buffer_from_gpu_columnar(), get_cols_ptr(), QueryExecutionContext::groupBufferToDeinterleavedResults(), QueryMemoryInitializer::initRowGroups(), inplace_sort_gpu(), and anonymous_namespace{Execute.cpp}::permute_storage_columnar().

888  {
889  const auto warp_count = getWarpCount();
890  if (output_columnar_) {
891  CHECK_EQ(size_t(1), warp_count);
892  size_t offset{0};
893  if (!keyless_hash_) {
895  }
896  if (is_table_function_) {
897  for (size_t index = 0; index < col_idx; ++index) {
898  int8_t column_width = getPaddedSlotWidthBytes(index);
899  if (column_width > 0) {
900  offset += align_to_int64(column_width * entry_count_);
901  } else {
902  int64_t flatbuffer_size = getFlatBufferSize(index);
903  CHECK_GT(flatbuffer_size, 0);
904  offset += align_to_int64(flatbuffer_size);
905  }
906  }
907  } else {
908  for (size_t index = 0; index < col_idx; ++index) {
910  }
911  }
912  return offset;
913  }
914 
915  size_t offset{0};
916  if (keyless_hash_) {
917  // ignore, there's no group column in the output buffer
919  } else {
920  offset += group_col_widths_.size() * getEffectiveKeyWidth();
921  offset = align_to_int64(offset);
922  }
923  offset += getColOnlyOffInBytes(col_idx);
924  return offset;
925 }
#define CHECK_EQ(x, y)
Definition: Logger.h:297
size_t getEffectiveKeyWidth() const
#define CHECK_GT(x, y)
Definition: Logger.h:301
size_t getColOnlyOffInBytes(const size_t col_idx) const
size_t getPrependedGroupBufferSizeInBytes() const
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
QueryDescriptionType query_desc_type_
#define CHECK(condition)
Definition: Logger.h:289
std::vector< int8_t > group_col_widths_
int64_t getFlatBufferSize(const size_t slot_idx) const
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getColOffInBytesInNextBin ( const size_t  col_idx) const

Definition at line 960 of file QueryMemoryDescriptor.cpp.

References CHECK_EQ, getPaddedSlotWidthBytes(), getRowSize(), getWarpCount(), group_col_widths_, and output_columnar_.

Referenced by QueryExecutionContext::groupBufferToDeinterleavedResults().

960  {
961  auto warp_count = getWarpCount();
962  if (output_columnar_) {
963  CHECK_EQ(size_t(1), group_col_widths_.size());
964  CHECK_EQ(size_t(1), warp_count);
965  return getPaddedSlotWidthBytes(col_idx);
966  }
967 
968  return warp_count * getRowSize();
969 }
#define CHECK_EQ(x, y)
Definition: Logger.h:297
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
std::vector< int8_t > group_col_widths_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getColOnlyOffInBytes ( const size_t  col_idx) const

Definition at line 875 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getColOnlyOffInBytes().

Referenced by TargetExprCodegen::codegen(), GroupByAndAggregate::codegenAggColumnPtr(), TargetExprCodegen::codegenAggregate(), getColOffInBytes(), and ResultSetStorage::reduceSingleRow().

875  {
876  return col_slot_context_.getColOnlyOffInBytes(col_idx);
877 }
size_t getColOnlyOffInBytes(const size_t slot_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const ColSlotContext& QueryMemoryDescriptor::getColSlotContext ( ) const
inline

Definition at line 298 of file QueryMemoryDescriptor.h.

References col_slot_context_.

Referenced by QueryMemoryInitializer::copyFromTableFunctionGpuBuffers(), ResultSetStorage::reduceEntriesNoCollisionsColWise(), ResultSetReductionJIT::reduceOneEntryTargetsNoCollisions(), and QueryMemoryInitializer::setupTableFunctionGpuBuffers().

298 { return col_slot_context_; }

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getColsSize ( ) const

Definition at line 814 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getAllSlotsAlignedPaddedSize().

Referenced by getBufferSizeBytes(), getRowSize(), QueryExecutionContext::launchCpuCode(), and QueryExecutionContext::prepareKernelParams().

814  {
816 }
size_t getAllSlotsAlignedPaddedSize() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getCompactByteWidth ( ) const

Definition at line 836 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getCompactByteWidth().

Referenced by anonymous_namespace{TargetExprBuilder.cpp}::get_initial_agg_val(), and init_agg_val_vec().

836  {
838 }
size_t getCompactByteWidth() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const CountDistinctDescriptor& QueryMemoryDescriptor::getCountDistinctDescriptor ( const size_t  idx) const
inline
size_t QueryMemoryDescriptor::getCountDistinctDescriptorsSize ( ) const
inline
size_t QueryMemoryDescriptor::getEntryCount ( ) const
inline

Definition at line 252 of file QueryMemoryDescriptor.h.

References entry_count_.

Referenced by advance_to_next_columnar_target_buff(), QueryMemoryInitializer::allocateCountDistinctGpuMem(), QueryMemoryInitializer::applyStreamingTopNOffsetCpu(), QueryMemoryInitializer::applyStreamingTopNOffsetGpu(), ResultSetStorage::binSearchRowCount(), anonymous_namespace{QueryMemoryInitializer.cpp}::check_total_bitmap_memory(), ResultSetReductionJIT::codegen(), GroupByAndAggregate::codegenMultiColumnBaselineHash(), GroupByAndAggregate::codegenMultiColumnPerfectHash(), GpuSharedMemCodeBuilder::codegenReduction(), GroupByAndAggregate::codegenWindowRowPointer(), anonymous_namespace{QueryMemoryInitializer.cpp}::compact_projection_buffer_for_cpu_columnar(), QueryMemoryInitializer::compactProjectionBuffersCpu(), QueryMemoryInitializer::compactProjectionBuffersGpu(), copy_group_by_buffers_from_gpu(), create_dev_group_by_buffers(), QueryMemoryInitializer::createAndInitializeGroupByBufferGpu(), Executor::createKernels(), ResultSet::entryCount(), Executor::executePlanWithGroupBy(), ResultSetStorage::fillOneEntryColWise(), ResultSetStorage::fillOneEntryRowWise(), anonymous_namespace{ResultSetReduction.cpp}::get_matching_group_value_reduction(), ResultSetStorage::getEntryCount(), getPrependedGroupBufferSizeInBytes(), getPrependedGroupColOffInBytes(), ResultSet::getTargetValueFromBufferColwise(), QueryMemoryInitializer::initColumnarGroups(), QueryMemoryInitializer::initGroupByBuffer(), ResultSetStorage::initializeBaselineValueSlots(), ResultSetStorage::initializeColWise(), ResultSetStorage::initializeRowWise(), inplace_sort_gpu(), QueryExecutionContext::launchGpuCode(), ResultSetStorage::moveEntriesToBuffer(), ResultSetStorage::moveOneEntryToBuffer(), QueryMemoryDescriptor(), QueryMemoryInitializer::QueryMemoryInitializer(), ResultSetStorage::reduce(), ResultSetStorage::reduceOneEntryBaseline(), ResultSetStorage::reduceOneEntrySlotsBaseline(), ResultSetStorage::reduceOneSlotBaseline(), and ResultSetStorage::rewriteAggregateBufferOffsets().

252 { return entry_count_; }

+ Here is the caller graph for this function:

const Executor* QueryMemoryDescriptor::getExecutor ( ) const
inline

Definition at line 166 of file QueryMemoryDescriptor.h.

References executor_.

Referenced by anonymous_namespace{Execute.cpp}::build_row_for_empty_input(), ResultSetReductionJIT::codegen(), anonymous_namespace{Execute.cpp}::fill_entries_for_empty_input(), ResultSet::getExecutor(), ResultSet::getVarlenOrderEntry(), ResultSet::makeGeoTargetValue(), and ResultSet::makeVarlenTargetValue().

166 { return executor_; }

+ Here is the caller graph for this function:

int64_t QueryMemoryDescriptor::getFlatBufferSize ( const size_t  slot_idx) const
inline

Definition at line 226 of file QueryMemoryDescriptor.h.

References col_slot_context_, and ColSlotContext::getFlatBufferSize().

Referenced by getColOffInBytes().

226  {
227  return col_slot_context_.getFlatBufferSize(slot_idx);
228  }
int64_t getFlatBufferSize(const size_t slot_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getKeyCount ( ) const
inline

Definition at line 288 of file QueryMemoryDescriptor.h.

References getGroupbyColCount(), and keyless_hash_.

Referenced by anonymous_namespace{Execute.cpp}::permute_storage_columnar().

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const int8_t QueryMemoryDescriptor::getLogicalSlotWidthBytes ( const size_t  slot_idx) const

Definition at line 1174 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, ColSlotContext::getSlotInfo(), and SlotSize::logical_size.

Referenced by QueryMemoryInitializer::allocateCountDistinctBuffers(), QueryMemoryInitializer::allocateTDigests(), TargetExprCodegen::codegenAggregate(), and ResultSet::getTargetValueFromBufferRowwise().

1175  {
1176  return col_slot_context_.getSlotInfo(slot_idx).logical_size;
1177 }
int8_t logical_size
const SlotSize & getSlotInfo(const size_t slot_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int64_t QueryMemoryDescriptor::getMaxVal ( ) const
inline

Definition at line 256 of file QueryMemoryDescriptor.h.

References max_val_.

Referenced by GroupByAndAggregate::codegenGroupBy().

256 { return max_val_; }

+ Here is the caller graph for this function:

int64_t QueryMemoryDescriptor::getMinVal ( ) const
inline

Definition at line 255 of file QueryMemoryDescriptor.h.

References min_val_.

Referenced by GroupByAndAggregate::codegenSingleColumnPerfectHash().

255 { return min_val_; }

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getNextColOffInBytes ( const int8_t *  col_ptr,
const size_t  bin,
const size_t  col_idx 
) const

Definition at line 971 of file QueryMemoryDescriptor.cpp.

References align_to_int64(), CHECK, CHECK_EQ, entry_count_, getPaddedSlotWidthBytes(), getSlotCount(), getWarpCount(), group_col_widths_, and output_columnar_.

973  {
975  size_t offset{0};
976  auto warp_count = getWarpCount();
977  const auto chosen_bytes = getPaddedSlotWidthBytes(col_idx);
978  const auto total_slot_count = getSlotCount();
979  if (col_idx + 1 == total_slot_count) {
980  if (output_columnar_) {
981  return (entry_count_ - bin) * chosen_bytes;
982  } else {
983  return static_cast<size_t>(align_to_int64(col_ptr + chosen_bytes) - col_ptr);
984  }
985  }
986 
987  const auto next_chosen_bytes = getPaddedSlotWidthBytes(col_idx + 1);
988  if (output_columnar_) {
989  CHECK_EQ(size_t(1), group_col_widths_.size());
990  CHECK_EQ(size_t(1), warp_count);
991 
992  offset = align_to_int64(entry_count_ * chosen_bytes);
993 
994  offset += bin * (next_chosen_bytes - chosen_bytes);
995  return offset;
996  }
997 
998  if (next_chosen_bytes == sizeof(int64_t)) {
999  return static_cast<size_t>(align_to_int64(col_ptr + chosen_bytes) - col_ptr);
1000  } else {
1001  return chosen_bytes;
1002  }
1003 }
#define CHECK_EQ(x, y)
Definition: Logger.h:297
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
#define CHECK(condition)
Definition: Logger.h:289
std::vector< int8_t > group_col_widths_
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)

+ Here is the call graph for this function:

size_t QueryMemoryDescriptor::getNextColOffInBytesRowOnly ( const int8_t *  col_ptr,
const size_t  col_idx 
) const

Definition at line 1005 of file QueryMemoryDescriptor.cpp.

References align_to_int64(), getPaddedSlotWidthBytes(), and getSlotCount().

Referenced by QueryMemoryInitializer::initColumnsPerRow().

1006  {
1007  const auto chosen_bytes = getPaddedSlotWidthBytes(col_idx);
1008  const auto total_slot_count = getSlotCount();
1009  if (col_idx + 1 == total_slot_count) {
1010  return static_cast<size_t>(align_to_int64(col_ptr + chosen_bytes) - col_ptr);
1011  }
1012 
1013  const auto next_chosen_bytes = getPaddedSlotWidthBytes(col_idx + 1);
1014 
1015  if (next_chosen_bytes == sizeof(int64_t)) {
1016  return static_cast<size_t>(align_to_int64(col_ptr + chosen_bytes) - col_ptr);
1017  } else {
1018  return chosen_bytes;
1019  }
1020 }
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getPaddedColWidthForRange ( const size_t  offset,
const size_t  range 
) const
inline

Definition at line 209 of file QueryMemoryDescriptor.h.

References getPaddedSlotWidthBytes().

Referenced by result_set::get_byteoff_of_slot(), and ResultSet::makeGeoTargetValue().

209  {
210  size_t ret = 0;
211  for (size_t i = offset; i < offset + range; i++) {
212  ret += static_cast<size_t>(getPaddedSlotWidthBytes(i));
213  }
214  return ret;
215  }
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const int8_t QueryMemoryDescriptor::getPaddedSlotWidthBytes ( const size_t  slot_idx) const

Definition at line 1165 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, ColSlotContext::getSlotInfo(), and SlotSize::padded_size.

Referenced by advance_target_ptr_row_wise(), advance_to_next_columnar_target_buff(), TargetExprCodegen::codegen(), anonymous_namespace{GpuSharedMemoryUtils.cpp}::codegen_smem_dest_slot_ptr(), TargetExprCodegen::codegenAggregate(), GroupByAndAggregate::codegenOutputSlot(), compact_init_vals(), anonymous_namespace{QueryMemoryInitializer.cpp}::compact_projection_buffer_for_cpu_columnar(), copy_projection_buffer_from_gpu_columnar(), ResultSet::copyColumnIntoBuffer(), QueryMemoryInitializer::createAndInitializeGroupByBufferGpu(), ResultSet::eachCellInColumn(), Executor::executePlanWithoutGroupBy(), result_set::get_width_for_slot(), getColOffInBytes(), getColOffInBytesInNextBin(), getNextColOffInBytes(), getNextColOffInBytesRowOnly(), getPaddedColWidthForRange(), ResultSet::getPaddedSlotWidthBytes(), ResultSet::getTargetValueFromBufferColwise(), ResultSet::getTargetValueFromBufferRowwise(), init_agg_val_vec(), QueryMemoryInitializer::initColumnarGroups(), QueryMemoryInitializer::initColumnsPerRow(), inplace_sort_gpu(), ResultSetReductionJIT::isEmpty(), ResultSetStorage::isEmptyEntry(), ResultSetStorage::isEmptyEntryColumnar(), ResultSet::makeGeoTargetValue(), TargetExprCodegenBuilder::operator()(), anonymous_namespace{Execute.cpp}::permute_storage_columnar(), ResultSetStorage::reduceEntriesNoCollisionsColWise(), ResultSetReductionJIT::reduceOneAggregateSlot(), ResultSetReductionJIT::reduceOneEntryTargetsNoCollisions(), ResultSetStorage::reduceOneSlot(), ResultSetStorage::reduceSingleRow(), and ResultSetStorage::rewriteAggregateBufferOffsets().

1165  {
1166  return col_slot_context_.getSlotInfo(slot_idx).padded_size;
1167 }
const SlotSize & getSlotInfo(const size_t slot_idx) const
int8_t padded_size

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getPrependedGroupBufferSizeInBytes ( ) const

Definition at line 949 of file QueryMemoryDescriptor.cpp.

References align_to_int64(), CHECK, getEntryCount(), getGroupbyColCount(), groupColWidth(), and output_columnar_.

Referenced by getColOffInBytes().

949  {
951  size_t buffer_size{0};
952  for (size_t group_idx = 0; group_idx < getGroupbyColCount(); group_idx++) {
953  buffer_size += align_to_int64(
954  std::max(groupColWidth(group_idx), static_cast<int8_t>(sizeof(int64_t))) *
955  getEntryCount());
956  }
957  return buffer_size;
958 }
int8_t groupColWidth(const size_t key_idx) const
size_t getGroupbyColCount() const
#define CHECK(condition)
Definition: Logger.h:289
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getPrependedGroupColOffInBytes ( const size_t  group_idx) const

Definition at line 931 of file QueryMemoryDescriptor.cpp.

References align_to_int64(), CHECK, getEntryCount(), getGroupbyColCount(), groupColWidth(), and output_columnar_.

Referenced by ResultSetStorage::copyKeyColWise(), ResultSetStorage::isEmptyEntryColumnar(), and anonymous_namespace{Execute.cpp}::permute_storage_columnar().

932  {
934  CHECK(group_idx < getGroupbyColCount());
935  size_t offset{0};
936  for (size_t col_idx = 0; col_idx < group_idx; col_idx++) {
937  // TODO(Saman): relax that int64_bit part immediately
938  offset += align_to_int64(
939  std::max(groupColWidth(col_idx), static_cast<int8_t>(sizeof(int64_t))) *
940  getEntryCount());
941  }
942  return offset;
943 }
int8_t groupColWidth(const size_t key_idx) const
size_t getGroupbyColCount() const
#define CHECK(condition)
Definition: Logger.h:289
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

QueryDescriptionType QueryMemoryDescriptor::getQueryDescriptionType ( ) const
inline

Definition at line 168 of file QueryMemoryDescriptor.h.

References query_desc_type_.

Referenced by ResultSetStorage::binSearchRowCount(), ResultSetReductionJIT::codegen(), GroupByAndAggregate::codegen(), GpuReductionHelperJIT::codegen(), GroupByAndAggregate::codegenAggCalls(), GroupByAndAggregate::codegenAggColumnPtr(), GroupByAndAggregate::codegenGroupBy(), GroupByAndAggregate::codegenMultiColumnPerfectHash(), GroupByAndAggregate::codegenOutputSlot(), Executor::collectAllDeviceResults(), copy_projection_buffer_from_gpu_columnar(), Executor::createKernels(), ResultSet::getQueryDescriptionType(), GpuReductionHelperJIT::GpuReductionHelperJIT(), GpuSharedMemCodeBuilder::GpuSharedMemCodeBuilder(), init_agg_val_vec(), QueryMemoryInitializer::initColumnarGroups(), anonymous_namespace{TargetExprBuilder.cpp}::is_columnar_projection(), ResultSetReductionJIT::isEmpty(), ResultSetStorage::isEmptyEntry(), ResultSetStorage::isEmptyEntryColumnar(), isSingleColumnGroupByWithPerfectHash(), QueryExecutionContext::launchCpuCode(), QueryExecutionContext::launchGpuCode(), ResultSetStorage::moveEntriesToBuffer(), TargetExprCodegenBuilder::operator()(), ResultSetStorage::reduce(), Executor::reduceMultiDeviceResultSets(), ResultSetStorage::reduceOneEntryBaseline(), ResultSetReductionJIT::reduceOneEntryBaselineIdx(), ResultSetReductionJIT::reduceOneEntryNoCollisionsIdx(), ExecutionKernel::run(), ExecutionKernel::runImpl(), target_exprs_to_infos(), and ResultSet::updateStorageEntryCount().

168 { return query_desc_type_; }
QueryDescriptionType query_desc_type_

+ Here is the caller graph for this function:

std::unique_ptr< QueryExecutionContext > QueryMemoryDescriptor::getQueryExecutionContext ( const RelAlgExecutionUnit ra_exe_unit,
const Executor executor,
const ExecutorDeviceType  device_type,
const ExecutorDispatchMode  dispatch_mode,
const int  device_id,
const int  outer_table_id,
const int64_t  num_rows,
const std::vector< std::vector< const int8_t * >> &  col_buffers,
const std::vector< std::vector< uint64_t >> &  frag_offsets,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
const bool  output_columnar,
const bool  sort_on_gpu,
const size_t  thread_idx,
RenderInfo render_info 
) const

Definition at line 681 of file QueryMemoryDescriptor.cpp.

References DEBUG_TIMER, and QueryExecutionContext.

Referenced by ExecutionKernel::runImpl().

695  {
696  auto timer = DEBUG_TIMER(__func__);
697  if (frag_offsets.empty()) {
698  return nullptr;
699  }
700  return std::unique_ptr<QueryExecutionContext>(
701  new QueryExecutionContext(ra_exe_unit,
702  *this,
703  executor,
704  device_type,
705  dispatch_mode,
706  device_id,
707  outer_table_id,
708  num_rows,
709  col_buffers,
710  frag_offsets,
711  row_set_mem_owner,
712  output_columnar,
713  sort_on_gpu,
714  thread_idx,
715  render_info));
716 }
void sort_on_gpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes, ThrustAllocator &alloc, const int device_id)
#define DEBUG_TIMER(name)
Definition: Logger.h:407

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getRowSize ( ) const

Definition at line 818 of file QueryMemoryDescriptor.cpp.

References align_to_int64(), CHECK, getColsSize(), getEffectiveKeyWidth(), group_col_widths_, GroupByPerfectHash, keyless_hash_, output_columnar_, and query_desc_type_.

Referenced by QueryMemoryInitializer::applyStreamingTopNOffsetCpu(), QueryMemoryInitializer::applyStreamingTopNOffsetGpu(), ResultSetLogicalValuesBuilder::build(), GroupByAndAggregate::codegenGroupBy(), GroupByAndAggregate::codegenOutputSlot(), GroupByAndAggregate::codegenWindowRowPointer(), QueryMemoryInitializer::copyGroupByBuffersFromGpu(), create_dev_group_by_buffers(), QueryMemoryInitializer::createAndInitializeGroupByBufferGpu(), getBufferSizeBytes(), getColOffInBytesInNextBin(), QueryMemoryInitializer::initRowGroups(), anonymous_namespace{Execute.cpp}::permute_storage_row_wise(), QueryMemoryInitializer::prepareTopNHeapsDevBuffer(), QueryMemoryDescriptor(), QueryMemoryInitializer::QueryMemoryInitializer(), and ResultSetStorage::reduceSingleRow().

818  {
820  size_t total_bytes{0};
821  if (keyless_hash_) {
822  // ignore, there's no group column in the output buffer
824  } else {
825  total_bytes += group_col_widths_.size() * getEffectiveKeyWidth();
826  total_bytes = align_to_int64(total_bytes);
827  }
828  total_bytes += getColsSize();
829  return align_to_int64(total_bytes);
830 }
size_t getEffectiveKeyWidth() const
QueryDescriptionType query_desc_type_
#define CHECK(condition)
Definition: Logger.h:289
std::vector< int8_t > group_col_widths_
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getRowWidth ( ) const

Definition at line 1190 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getAllSlotsPaddedSize().

Referenced by get_row_bytes().

1190  {
1191  // Note: Actual row size may include padding (see ResultSetBufferAccessors.h)
1193 }
size_t getAllSlotsPaddedSize() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const int8_t QueryMemoryDescriptor::getSlotIndexForSingleSlotCol ( const size_t  col_idx) const

Definition at line 1179 of file QueryMemoryDescriptor.cpp.

References CHECK_EQ, col_slot_context_, and ColSlotContext::getSlotsForCol().

Referenced by QueryMemoryInitializer::allocateCountDistinctBuffers(), QueryMemoryInitializer::allocateModes(), and QueryMemoryInitializer::allocateTDigests().

1180  {
1181  const auto& col_slots = col_slot_context_.getSlotsForCol(col_idx);
1182  CHECK_EQ(col_slots.size(), size_t(1));
1183  return col_slots.front();
1184 }
#define CHECK_EQ(x, y)
Definition: Logger.h:297
const std::vector< size_t > & getSlotsForCol(const size_t col_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int64_t QueryMemoryDescriptor::getTargetGroupbyIndex ( const size_t  target_idx) const
inline

Definition at line 234 of file QueryMemoryDescriptor.h.

References CHECK_LT, and target_groupby_indices_.

Referenced by ResultSet::getTargetValueFromBufferColwise(), ResultSet::getTargetValueFromBufferRowwise(), ResultSetReductionJIT::reduceOneEntryBaseline(), ResultSetStorage::reduceOneEntrySlotsBaseline(), ResultSetReductionJIT::reduceOneEntryTargetsNoCollisions(), ResultSetReductionJIT::reduceOneSlot(), ResultSetStorage::reduceOneSlot(), and reductionKey().

234  {
235  CHECK_LT(target_idx, target_groupby_indices_.size());
236  return target_groupby_indices_[target_idx];
237  }
std::vector< int64_t > target_groupby_indices_
#define CHECK_LT(x, y)
Definition: Logger.h:299

+ Here is the caller graph for this function:

int32_t QueryMemoryDescriptor::getTargetIdxForKey ( ) const
inline

Definition at line 181 of file QueryMemoryDescriptor.h.

References idx_target_as_key_.

Referenced by ResultSetReductionJIT::isEmpty(), ResultSetStorage::isEmptyEntry(), ResultSetStorage::isEmptyEntryColumnar(), ResultSetStorage::reduceSingleRow(), and reductionKey().

181 { return idx_target_as_key_; }

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getTotalBytesOfColumnarBuffers ( ) const
private

Returns the maximum total number of bytes (including required paddings) to store all non-lazy columns' results for columnar cases.

Definition at line 845 of file QueryMemoryDescriptor.cpp.

References CHECK, col_slot_context_, entry_count_, ColSlotContext::getTotalBytesOfColumnarBuffers(), and output_columnar_.

Referenced by getBufferSizeBytes(), and getTotalBytesOfColumnarProjections().

845  {
848 }
size_t getTotalBytesOfColumnarBuffers(const size_t entry_count) const
#define CHECK(condition)
Definition: Logger.h:289

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getTotalBytesOfColumnarBuffers ( const size_t  num_entries_per_column) const
private

This is a helper function that returns the total number of bytes (including required paddings) to store all non-lazy columns' results for columnar cases.

Definition at line 854 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getTotalBytesOfColumnarBuffers().

855  {
856  return col_slot_context_.getTotalBytesOfColumnarBuffers(num_entries_per_column);
857 }
size_t getTotalBytesOfColumnarBuffers(const size_t entry_count) const

+ Here is the call graph for this function:

size_t QueryMemoryDescriptor::getTotalBytesOfColumnarProjections ( const size_t  projection_count) const
private

Returns the effective total number of bytes from columnar projections, which includes 1) total number of bytes used to store all non-lazy columns 2) total number of bytes used to store row indices (for lazy fetches, etc.)

NOTE: this function does not represent the buffer sizes dedicated for the results, but the required memory to fill all valid results into a compact new buffer (with no holes in it)

Definition at line 868 of file QueryMemoryDescriptor.cpp.

References getTotalBytesOfColumnarBuffers().

869  {
870  constexpr size_t row_index_width = sizeof(int64_t);
871  return getTotalBytesOfColumnarBuffers(projection_count) +
872  row_index_width * projection_count;
873 }
size_t getTotalBytesOfColumnarBuffers() const

+ Here is the call graph for this function:

size_t QueryMemoryDescriptor::getWarpCount ( ) const

Definition at line 832 of file QueryMemoryDescriptor.cpp.

References executor_, and interleaved_bins_on_gpu_.

Referenced by getColOffInBytes(), getColOffInBytesInNextBin(), and getNextColOffInBytes().

832  {
833  return (interleaved_bins_on_gpu_ ? executor_->warpSize() : 1);
834 }

+ Here is the caller graph for this function:

int8_t QueryMemoryDescriptor::groupColWidth ( const size_t  key_idx) const
inline

Definition at line 184 of file QueryMemoryDescriptor.h.

References CHECK_LT, and group_col_widths_.

Referenced by ResultSetStorage::copyKeyColWise(), getPrependedGroupBufferSizeInBytes(), getPrependedGroupColOffInBytes(), ResultSetStorage::isEmptyEntryColumnar(), and anonymous_namespace{Execute.cpp}::permute_storage_columnar().

184  {
185  CHECK_LT(key_idx, group_col_widths_.size());
186  return group_col_widths_[key_idx];
187  }
#define CHECK_LT(x, y)
Definition: Logger.h:299
std::vector< int8_t > group_col_widths_

+ Here is the caller graph for this function:

const auto QueryMemoryDescriptor::groupColWidthsBegin ( ) const
inline

Definition at line 191 of file QueryMemoryDescriptor.h.

References group_col_widths_.

191 { return group_col_widths_.begin(); }
std::vector< int8_t > group_col_widths_
const auto QueryMemoryDescriptor::groupColWidthsEnd ( ) const
inline

Definition at line 192 of file QueryMemoryDescriptor.h.

References group_col_widths_.

192 { return group_col_widths_.end(); }
std::vector< int8_t > group_col_widths_
bool QueryMemoryDescriptor::hasInterleavedBinsOnGpu ( ) const
inline

Definition at line 178 of file QueryMemoryDescriptor.h.

References interleaved_bins_on_gpu_.

bool QueryMemoryDescriptor::hasNulls ( ) const
inline

Definition at line 259 of file QueryMemoryDescriptor.h.

References has_nulls_.

Referenced by GroupByAndAggregate::codegenGroupBy().

259 { return has_nulls_; }

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::hasVarlenOutput ( ) const
inline

Definition at line 337 of file QueryMemoryDescriptor.h.

References col_slot_context_, and ColSlotContext::hasVarlenOutput().

Referenced by QueryMemoryInitializer::applyStreamingTopNOffsetCpu(), QueryMemoryInitializer::applyStreamingTopNOffsetGpu(), GroupByAndAggregate::codegenVarlenOutputBuffer(), QueryMemoryInitializer::compactProjectionBuffersCpu(), QueryMemoryInitializer::compactProjectionBuffersGpu(), QueryMemoryInitializer::copyGroupByBuffersFromGpu(), QueryMemoryInitializer::createAndInitializeGroupByBufferGpu(), QueryExecutionContext::getRowSet(), query_group_by_template_impl(), and QueryMemoryInitializer::QueryMemoryInitializer().

bool hasVarlenOutput() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::unique_ptr< QueryMemoryDescriptor > QueryMemoryDescriptor::init ( const Executor executor,
const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  query_infos,
const ColRangeInfo col_range_info,
const KeylessInfo keyless_info,
const bool  allow_multifrag,
const ExecutorDeviceType  device_type,
const int8_t  crt_min_byte_width,
const bool  sort_on_gpu_hint,
const size_t  shard_count,
const size_t  max_groups_buffer_entry_count,
RenderInfo render_info,
const CountDistinctDescriptors  count_distinct_descriptors,
const bool  must_use_baseline_sort,
const bool  output_columnar_hint,
const bool  streaming_top_n_hint 
)
static

Definition at line 241 of file QueryMemoryDescriptor.cpp.

References anonymous_namespace{QueryMemoryDescriptor.cpp}::get_col_byte_widths(), RelAlgExecutionUnit::groupby_exprs, and RelAlgExecutionUnit::target_exprs.

Referenced by GroupByAndAggregate::initQueryMemoryDescriptorImpl().

257  {
258  auto group_col_widths = get_col_byte_widths(ra_exe_unit.groupby_exprs);
259  const bool is_group_by{!group_col_widths.empty()};
260 
261  auto col_slot_context = ColSlotContext(ra_exe_unit.target_exprs, {});
262 
263  const auto min_slot_size = QueryMemoryDescriptor::pick_target_compact_width(
264  ra_exe_unit, query_infos, crt_min_byte_width);
265 
266  col_slot_context.setAllSlotsPaddedSize(min_slot_size);
267  col_slot_context.validate();
268 
269  if (!is_group_by) {
270  CHECK(!must_use_baseline_sort);
271 
272  return std::make_unique<QueryMemoryDescriptor>(
273  executor,
274  ra_exe_unit,
275  query_infos,
276  allow_multifrag,
277  false,
278  false,
279  -1,
280  ColRangeInfo{ra_exe_unit.estimator ? QueryDescriptionType::Estimator
282  0,
283  0,
284  0,
285  false},
286  col_slot_context,
287  std::vector<int8_t>{},
288  /*group_col_compact_width=*/0,
289  std::vector<int64_t>{},
290  /*entry_count=*/1,
291  count_distinct_descriptors,
292  false,
293  output_columnar_hint,
294  render_info && render_info->isInSitu(),
295  must_use_baseline_sort,
296  /*use_streaming_top_n=*/false);
297  }
298 
299  size_t entry_count = 1;
300  auto actual_col_range_info = col_range_info;
301  bool interleaved_bins_on_gpu = false;
302  bool keyless_hash = false;
303  bool streaming_top_n = false;
304  int8_t group_col_compact_width = 0;
305  int32_t idx_target_as_key = -1;
306  auto output_columnar = output_columnar_hint;
307  std::vector<int64_t> target_groupby_indices;
308 
309  switch (col_range_info.hash_type_) {
311  if (render_info) {
312  // TODO(croot): this can be removed now thanks to the more centralized
313  // NonInsituQueryClassifier code, but keeping it just in case
314  render_info->setNonInSitu();
315  }
316  // keyless hash: whether or not group columns are stored at the beginning of the
317  // output buffer
318  keyless_hash =
319  (!sort_on_gpu_hint ||
321  col_range_info.max, col_range_info.min, col_range_info.bucket)) &&
322  !col_range_info.bucket && !must_use_baseline_sort && keyless_info.keyless;
323 
324  // if keyless, then this target index indicates wheter an entry is empty or not
325  // (acts as a key)
326  idx_target_as_key = keyless_info.target_index;
327 
328  if (group_col_widths.size() > 1) {
329  // col range info max contains the expected cardinality of the output
330  entry_count = static_cast<size_t>(actual_col_range_info.max);
331  actual_col_range_info.bucket = 0;
332  } else {
333  // single column perfect hash
334  entry_count = std::max(
335  GroupByAndAggregate::getBucketedCardinality(col_range_info), int64_t(1));
336  const size_t interleaved_max_threshold{512};
337 
338  if (must_use_baseline_sort) {
339  target_groupby_indices = target_expr_group_by_indices(ra_exe_unit.groupby_exprs,
340  ra_exe_unit.target_exprs);
341  col_slot_context =
342  ColSlotContext(ra_exe_unit.target_exprs, target_groupby_indices);
343  }
344 
345  bool has_varlen_sample_agg = false;
346  for (const auto& target_expr : ra_exe_unit.target_exprs) {
347  if (target_expr->get_contains_agg()) {
348  const auto agg_expr = dynamic_cast<Analyzer::AggExpr*>(target_expr);
349  CHECK(agg_expr);
350  if (agg_expr->get_aggtype() == kSAMPLE &&
351  agg_expr->get_type_info().is_varlen()) {
352  has_varlen_sample_agg = true;
353  break;
354  }
355  }
356  }
357 
358  interleaved_bins_on_gpu = keyless_hash && !has_varlen_sample_agg &&
359  (entry_count <= interleaved_max_threshold) &&
360  (device_type == ExecutorDeviceType::GPU) &&
362  count_distinct_descriptors) &&
363  !output_columnar;
364  }
365  break;
366  }
368  if (render_info) {
369  // TODO(croot): this can be removed now thanks to the more centralized
370  // NonInsituQueryClassifier code, but keeping it just in case
371  render_info->setNonInSitu();
372  }
373  entry_count = shard_count
374  ? (max_groups_buffer_entry_count + shard_count - 1) / shard_count
375  : max_groups_buffer_entry_count;
376  target_groupby_indices = target_expr_group_by_indices(ra_exe_unit.groupby_exprs,
377  ra_exe_unit.target_exprs);
378  col_slot_context = ColSlotContext(ra_exe_unit.target_exprs, target_groupby_indices);
379 
380  group_col_compact_width =
381  output_columnar ? 8
382  : pick_baseline_key_width(ra_exe_unit, query_infos, executor);
383 
384  actual_col_range_info =
386  break;
387  }
389  CHECK(!must_use_baseline_sort);
390 
391  if (streaming_top_n_hint && use_streaming_top_n(ra_exe_unit, output_columnar)) {
392  streaming_top_n = true;
393  entry_count = ra_exe_unit.sort_info.offset + ra_exe_unit.sort_info.limit;
394  } else {
395  if (ra_exe_unit.use_bump_allocator) {
396  output_columnar = false;
397  entry_count = 0;
398  } else {
399  entry_count = ra_exe_unit.scan_limit
400  ? static_cast<size_t>(ra_exe_unit.scan_limit)
401  : max_groups_buffer_entry_count;
402  }
403  }
404 
405  const auto catalog = executor->getCatalog();
406  CHECK(catalog);
407  target_groupby_indices = executor->plan_state_->allow_lazy_fetch_
408  ? target_expr_proj_indices(ra_exe_unit, *catalog)
409  : std::vector<int64_t>{};
410 
411  col_slot_context = ColSlotContext(ra_exe_unit.target_exprs, target_groupby_indices);
412  break;
413  }
414  default:
415  UNREACHABLE() << "Unknown query type";
416  }
417 
418  return std::make_unique<QueryMemoryDescriptor>(executor,
419  ra_exe_unit,
420  query_infos,
421  allow_multifrag,
422  keyless_hash,
423  interleaved_bins_on_gpu,
424  idx_target_as_key,
425  actual_col_range_info,
426  col_slot_context,
427  group_col_widths,
428  group_col_compact_width,
429  target_groupby_indices,
430  entry_count,
431  count_distinct_descriptors,
432  sort_on_gpu_hint,
433  output_columnar,
434  render_info && render_info->isInSitu(),
435  must_use_baseline_sort,
436  streaming_top_n);
437 }
std::vector< Analyzer::Expr * > target_exprs
static bool many_entries(const int64_t max_val, const int64_t min_val, const int64_t bucket)
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
std::vector< int64_t > target_expr_proj_indices(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &cat)
const bool keyless
std::vector< int8_t > get_col_byte_widths(const T &col_expr_list)
QueryDescriptionType hash_type_
#define UNREACHABLE()
Definition: Logger.h:333
bool use_streaming_top_n(const RelAlgExecutionUnit &ra_exe_unit, const bool output_columnar)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
static int8_t pick_target_compact_width(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const int8_t crt_min_byte_width)
const int32_t target_index
std::vector< int64_t > target_expr_group_by_indices(const std::list< std::shared_ptr< Analyzer::Expr >> &groupby_exprs, const std::vector< Analyzer::Expr * > &target_exprs)
void setNonInSitu()
Definition: RenderInfo.cpp:49
#define CHECK(condition)
Definition: Logger.h:289
static bool countDescriptorsLogicallyEmpty(const CountDistinctDescriptors &count_distinct_descriptors)
int8_t pick_baseline_key_width(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const Executor *executor)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::interleavedBins ( const ExecutorDeviceType  device_type) const
bool QueryMemoryDescriptor::isGroupBy ( ) const
inline

Definition at line 195 of file QueryMemoryDescriptor.h.

References group_col_widths_.

Referenced by anonymous_namespace{TargetExprBuilder.cpp}::get_initial_agg_val(), init_agg_val_vec(), QueryMemoryInitializer::initColumnsPerRow(), QueryExecutionContext::launchCpuCode(), QueryExecutionContext::launchGpuCode(), and QueryMemoryInitializer::QueryMemoryInitializer().

195 { return !group_col_widths_.empty(); }
std::vector< int8_t > group_col_widths_

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::isLogicalSizedColumnsAllowed ( ) const

Definition at line 1092 of file QueryMemoryDescriptor.cpp.

References g_cluster, output_columnar_, Projection, query_desc_type_, and TableFunction.

Referenced by TargetExprCodegen::codegenAggregate(), TargetExprCodegenBuilder::codegenMultiSlotSampleExpressions(), TargetExprCodegenBuilder::codegenSlotEmptyKey(), init_agg_val_vec(), ResultSet::makeTargetValue(), QueryMemoryDescriptor(), ResultSetStorage::reduceOneSlot(), ResultSetStorage::reduceOneSlotSingleValue(), and setOutputColumnar().

1092  {
1093  // In distributed mode, result sets are serialized using rowwise iterators, so we use
1094  // consistent slot widths for now
1095  return output_columnar_ && !g_cluster &&
1097  query_desc_type_ == QueryDescriptionType::TableFunction);
1098 }
QueryDescriptionType query_desc_type_
bool g_cluster

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::isSingleColumnGroupByWithPerfectHash ( ) const
inline

Definition at line 170 of file QueryMemoryDescriptor.h.

References getGroupbyColCount(), getQueryDescriptionType(), and GroupByPerfectHash.

Referenced by GroupByAndAggregate::codegenGroupBy(), and ResultSet::getTargetValueFromBufferRowwise().

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::isWarpSyncRequired ( const ExecutorDeviceType  device_type) const

Definition at line 1149 of file QueryMemoryDescriptor.cpp.

References executor_, and GPU.

Referenced by query_group_by_template_impl().

1150  {
1151  if (device_type == ExecutorDeviceType::GPU) {
1152  return executor_->cudaMgr()->isArchVoltaOrGreaterForAll();
1153  }
1154  return false;
1155 }

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::lazyInitGroups ( const ExecutorDeviceType  device_type) const

Definition at line 1139 of file QueryMemoryDescriptor.cpp.

References count_distinct_descriptors_, countDescriptorsLogicallyEmpty(), GPU, and render_output_.

Referenced by create_dev_group_by_buffers(), QueryMemoryInitializer::createAndInitializeGroupByBufferGpu(), QueryMemoryInitializer::prepareTopNHeapsDevBuffer(), QueryMemoryInitializer::QueryMemoryInitializer(), and toString().

1139  {
1140  return device_type == ExecutorDeviceType::GPU && !render_output_ &&
1142 }
CountDistinctDescriptors count_distinct_descriptors_
static bool countDescriptorsLogicallyEmpty(const CountDistinctDescriptors &count_distinct_descriptors)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static bool QueryMemoryDescriptor::many_entries ( const int64_t  max_val,
const int64_t  min_val,
const int64_t  bucket 
)
inlinestatic

Definition at line 142 of file QueryMemoryDescriptor.h.

Referenced by blocksShareMemory().

144  {
145  return max_val - min_val > 10000 * std::max(bucket, int64_t(1));
146  }

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::mustUseBaselineSort ( ) const
inline

Definition at line 279 of file QueryMemoryDescriptor.h.

References must_use_baseline_sort_.

Referenced by GroupByAndAggregate::codegenSingleColumnPerfectHash().

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::operator== ( const QueryMemoryDescriptor other) const

Definition at line 617 of file QueryMemoryDescriptor.cpp.

References bucket_, col_slot_context_, count_distinct_descriptors_, force_4byte_float_, group_col_compact_width_, group_col_widths_, has_nulls_, idx_target_as_key_, interleaved_bins_on_gpu_, keyless_hash_, max_val_, min_val_, output_columnar_, query_desc_type_, sort_on_gpu_, and target_groupby_indices_.

617  {
618  // Note that this method does not check ptr reference members (e.g. executor_) or
619  // entry_count_
620  if (query_desc_type_ != other.query_desc_type_) {
621  return false;
622  }
623  if (keyless_hash_ != other.keyless_hash_) {
624  return false;
625  }
627  return false;
628  }
629  if (idx_target_as_key_ != other.idx_target_as_key_) {
630  return false;
631  }
632  if (force_4byte_float_ != other.force_4byte_float_) {
633  return false;
634  }
635  if (group_col_widths_ != other.group_col_widths_) {
636  return false;
637  }
639  return false;
640  }
642  return false;
643  }
644  if (min_val_ != other.min_val_) {
645  return false;
646  }
647  if (max_val_ != other.max_val_) {
648  return false;
649  }
650  if (bucket_ != other.bucket_) {
651  return false;
652  }
653  if (has_nulls_ != other.has_nulls_) {
654  return false;
655  }
657  return false;
658  } else {
659  // Count distinct descriptors can legitimately differ in device only.
660  for (size_t i = 0; i < count_distinct_descriptors_.size(); ++i) {
661  auto ref_count_distinct_desc = other.count_distinct_descriptors_[i];
662  auto count_distinct_desc = count_distinct_descriptors_[i];
663  count_distinct_desc.device_type = ref_count_distinct_desc.device_type;
664  if (ref_count_distinct_desc != count_distinct_desc) {
665  return false;
666  }
667  }
668  }
669  if (sort_on_gpu_ != other.sort_on_gpu_) {
670  return false;
671  }
672  if (output_columnar_ != other.output_columnar_) {
673  return false;
674  }
675  if (col_slot_context_ != other.col_slot_context_) {
676  return false;
677  }
678  return true;
679 }
std::vector< int64_t > target_groupby_indices_
CountDistinctDescriptors count_distinct_descriptors_
QueryDescriptionType query_desc_type_
std::vector< int8_t > group_col_widths_
int8_t QueryMemoryDescriptor::pick_target_compact_width ( const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  query_infos,
const int8_t  crt_min_byte_width 
)
static

Definition at line 718 of file QueryMemoryDescriptor.cpp.

References CHECK, CHECK_EQ, g_bigint_count, anonymous_namespace{QueryMemoryDescriptor.cpp}::get_col_byte_widths(), Analyzer::UOper::get_operand(), Analyzer::Expr::get_type_info(), RelAlgExecutionUnit::groupby_exprs, RelAlgExecutionUnit::input_col_descs, anonymous_namespace{QueryMemoryDescriptor.cpp}::is_int_and_no_bigger_than(), kCOUNT, kENCODING_DICT, kUNNEST, and RelAlgExecutionUnit::target_exprs.

721  {
722  if (g_bigint_count) {
723  return sizeof(int64_t);
724  }
725  int8_t compact_width{0};
726  auto col_it = ra_exe_unit.input_col_descs.begin();
727  auto const end = ra_exe_unit.input_col_descs.end();
728  int unnest_array_col_id{std::numeric_limits<int>::min()};
729  for (const auto& groupby_expr : ra_exe_unit.groupby_exprs) {
730  const auto uoper = dynamic_cast<Analyzer::UOper*>(groupby_expr.get());
731  if (uoper && uoper->get_optype() == kUNNEST) {
732  const auto& arg_ti = uoper->get_operand()->get_type_info();
733  CHECK(arg_ti.is_array());
734  const auto& elem_ti = arg_ti.get_elem_type();
735  if (elem_ti.is_string() && elem_ti.get_compression() == kENCODING_DICT) {
736  unnest_array_col_id = (*col_it)->getColId();
737  } else {
738  compact_width = crt_min_byte_width;
739  break;
740  }
741  }
742  if (col_it != end) {
743  ++col_it;
744  }
745  }
746  if (!compact_width &&
747  (ra_exe_unit.groupby_exprs.size() != 1 || !ra_exe_unit.groupby_exprs.front())) {
748  compact_width = crt_min_byte_width;
749  }
750  if (!compact_width) {
751  col_it = ra_exe_unit.input_col_descs.begin();
752  std::advance(col_it, ra_exe_unit.groupby_exprs.size());
753  for (const auto target : ra_exe_unit.target_exprs) {
754  const auto& ti = target->get_type_info();
755  const auto agg = dynamic_cast<const Analyzer::AggExpr*>(target);
756  if (agg && agg->get_arg()) {
757  compact_width = crt_min_byte_width;
758  break;
759  }
760 
761  if (agg) {
762  CHECK_EQ(kCOUNT, agg->get_aggtype());
763  CHECK(!agg->get_is_distinct());
764  if (col_it != end) {
765  ++col_it;
766  }
767  continue;
768  }
769 
770  if (is_int_and_no_bigger_than(ti, 4) ||
771  (ti.is_string() && ti.get_compression() == kENCODING_DICT)) {
772  if (col_it != end) {
773  ++col_it;
774  }
775  continue;
776  }
777 
778  const auto uoper = dynamic_cast<Analyzer::UOper*>(target);
779  if (uoper && uoper->get_optype() == kUNNEST &&
780  (*col_it)->getColId() == unnest_array_col_id) {
781  const auto arg_ti = uoper->get_operand()->get_type_info();
782  CHECK(arg_ti.is_array());
783  const auto& elem_ti = arg_ti.get_elem_type();
784  if (elem_ti.is_string() && elem_ti.get_compression() == kENCODING_DICT) {
785  if (col_it != end) {
786  ++col_it;
787  }
788  continue;
789  }
790  }
791 
792  compact_width = crt_min_byte_width;
793  break;
794  }
795  }
796  if (!compact_width) {
797  size_t total_tuples{0};
798  for (const auto& qi : query_infos) {
799  total_tuples += qi.info.getNumTuples();
800  }
801  return total_tuples <= static_cast<size_t>(std::numeric_limits<uint32_t>::max()) ||
802  unnest_array_col_id != std::numeric_limits<int>::min()
803  ? 4
804  : crt_min_byte_width;
805  } else {
806  // TODO(miyu): relax this condition to allow more cases just w/o padding
807  for (auto wid : get_col_byte_widths(ra_exe_unit.target_exprs)) {
808  compact_width = std::max(compact_width, wid);
809  }
810  return compact_width;
811  }
812 }
std::vector< Analyzer::Expr * > target_exprs
#define CHECK_EQ(x, y)
Definition: Logger.h:297
std::vector< int8_t > get_col_byte_widths(const T &col_expr_list)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
bool g_bigint_count
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:83
const Expr * get_operand() const
Definition: Analyzer.h:380
Definition: sqldefs.h:78
bool is_int_and_no_bigger_than(const SQLTypeInfo &ti, const size_t byte_width)
#define CHECK(condition)
Definition: Logger.h:289
std::list< std::shared_ptr< const InputColDescriptor > > input_col_descs

+ Here is the call graph for this function:

std::string QueryMemoryDescriptor::queryDescTypeToString ( ) const

Definition at line 1223 of file QueryMemoryDescriptor.cpp.

References Estimator, GroupByBaselineHash, GroupByPerfectHash, NonGroupedAggregate, Projection, query_desc_type_, TableFunction, and UNREACHABLE.

Referenced by reductionKey().

1223  {
1224  switch (query_desc_type_) {
1226  return "Perfect Hash";
1228  return "Baseline Hash";
1230  return "Projection";
1232  return "Table Function";
1234  return "Non-grouped Aggregate";
1236  return "Estimator";
1237  default:
1238  UNREACHABLE();
1239  }
1240  return "";
1241 }
#define UNREACHABLE()
Definition: Logger.h:333
QueryDescriptionType query_desc_type_

+ Here is the caller graph for this function:

std::string QueryMemoryDescriptor::reductionKey ( ) const

Definition at line 1266 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, getEffectiveKeyWidth(), getGroupbyColCount(), getTargetGroupbyIndex(), getTargetIdxForKey(), join(), keyless_hash_, queryDescTypeToString(), targetGroupbyIndicesSize(), to_string(), ColSlotContext::toString(), and toString().

Referenced by ResultSetReductionJIT::cacheKey(), and toString().

1266  {
1267  std::string str;
1268  str += "Query Memory Descriptor State\n";
1269  str += "\tQuery Type: " + queryDescTypeToString() + "\n";
1270  str +=
1271  "\tKeyless Hash: " + ::toString(keyless_hash_) +
1272  (keyless_hash_ ? ", target index for key: " + std::to_string(getTargetIdxForKey())
1273  : "") +
1274  "\n";
1275  str += "\tEffective key width: " + std::to_string(getEffectiveKeyWidth()) + "\n";
1276  str += "\tNumber of group columns: " + std::to_string(getGroupbyColCount()) + "\n";
1277  const auto group_indices_size = targetGroupbyIndicesSize();
1278  if (group_indices_size) {
1279  std::vector<std::string> group_indices_strings;
1280  for (size_t target_idx = 0; target_idx < group_indices_size; ++target_idx) {
1281  group_indices_strings.push_back(std::to_string(getTargetGroupbyIndex(target_idx)));
1282  }
1283  str += "\tTarget group by indices: " +
1284  boost::algorithm::join(group_indices_strings, ",") + "\n";
1285  }
1286  str += "\t" + col_slot_context_.toString();
1287  return str;
1288 }
int64_t getTargetGroupbyIndex(const size_t target_idx) const
std::string toString() const
std::string join(T const &container, std::string const &delim)
size_t getEffectiveKeyWidth() const
std::string to_string(char const *&&v)
size_t getGroupbyColCount() const
size_t targetGroupbyIndicesSize() const
std::string toString() const
std::string queryDescTypeToString() const
int32_t getTargetIdxForKey() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void QueryMemoryDescriptor::resetGroupColWidths ( const std::vector< int8_t > &  new_group_col_widths)
inlineprotected

Definition at line 352 of file QueryMemoryDescriptor.h.

References group_col_widths_.

352  {
353  group_col_widths_ = new_group_col_widths;
354  }
std::vector< int8_t > group_col_widths_
void QueryMemoryDescriptor::setAllTargetGroupbyIndices ( std::vector< int64_t >  group_by_indices)
inline

Definition at line 239 of file QueryMemoryDescriptor.h.

References target_groupby_indices_.

239  {
240  target_groupby_indices_ = group_by_indices;
241  }
std::vector< int64_t > target_groupby_indices_
void QueryMemoryDescriptor::setEntryCount ( const size_t  val)
inline

Definition at line 253 of file QueryMemoryDescriptor.h.

References entry_count_.

Referenced by Executor::executePlanWithGroupBy(), Executor::reduceMultiDeviceResultSets(), ResultSetStorage::updateEntryCount(), and ResultSet::updateStorageEntryCount().

253 { entry_count_ = val; }

+ Here is the caller graph for this function:

void QueryMemoryDescriptor::setForceFourByteFloat ( const bool  val)
inline

Definition at line 284 of file QueryMemoryDescriptor.h.

References force_4byte_float_.

void QueryMemoryDescriptor::setGroupColCompactWidth ( const int8_t  val)
inline

Definition at line 197 of file QueryMemoryDescriptor.h.

References group_col_compact_width_.

void QueryMemoryDescriptor::setHasInterleavedBinsOnGpu ( const bool  val)
inline

Definition at line 179 of file QueryMemoryDescriptor.h.

References interleaved_bins_on_gpu_.

void QueryMemoryDescriptor::setHasKeylessHash ( const bool  val)
inline

Definition at line 176 of file QueryMemoryDescriptor.h.

References keyless_hash_.

void QueryMemoryDescriptor::setOutputColumnar ( const bool  val)

Definition at line 1081 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, isLogicalSizedColumnsAllowed(), output_columnar_, and ColSlotContext::setAllSlotsPaddedSizeToLogicalSize().

Referenced by TableFunctionManager::allocate_output_buffers(), Executor::executeTableFunction(), and TableFunctionExecutionContext::launchGpuCode().

1081  {
1082  output_columnar_ = val;
1085  }
1086 }
bool isLogicalSizedColumnsAllowed() const
void setAllSlotsPaddedSizeToLogicalSize()

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void QueryMemoryDescriptor::setPaddedSlotWidthBytes ( const size_t  slot_idx,
const int8_t  bytes 
)

Definition at line 1169 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::setPaddedSlotWidthBytes().

Referenced by TargetExprCodegenBuilder::operator()().

1170  {
1171  col_slot_context_.setPaddedSlotWidthBytes(slot_idx, bytes);
1172 }
void setPaddedSlotWidthBytes(const size_t slot_idx, const int8_t bytes)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void QueryMemoryDescriptor::setQueryDescriptionType ( const QueryDescriptionType  val)
inline

Definition at line 169 of file QueryMemoryDescriptor.h.

References query_desc_type_.

169 { query_desc_type_ = val; }
QueryDescriptionType query_desc_type_
void QueryMemoryDescriptor::setTargetIdxForKey ( const int32_t  val)
inline

Definition at line 182 of file QueryMemoryDescriptor.h.

References idx_target_as_key_.

bool QueryMemoryDescriptor::slotIsVarlenOutput ( const size_t  slot_idx) const
inline

Definition at line 347 of file QueryMemoryDescriptor.h.

References col_slot_context_, and ColSlotContext::slotIsVarlen().

Referenced by advance_target_ptr_row_wise(), and ResultSet::makeGeoTargetValue().

347  {
348  return col_slot_context_.slotIsVarlen(slot_idx);
349  }
bool slotIsVarlen(const size_t slot_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::sortOnGpu ( ) const
inline

Definition at line 269 of file QueryMemoryDescriptor.h.

References sort_on_gpu_.

Referenced by alignPaddedSlots(), QueryExecutionContext::launchGpuCode(), ExecutionKernel::runImpl(), and use_speculative_top_n().

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::targetGroupbyIndicesSize ( ) const
inline
size_t QueryMemoryDescriptor::targetGroupbyNegativeIndicesSize ( ) const
inline

Definition at line 244 of file QueryMemoryDescriptor.h.

References target_groupby_indices_.

244  {
245  return std::count_if(
246  target_groupby_indices_.begin(),
248  [](const int64_t& target_group_by_index) { return target_group_by_index < 0; });
249  }
std::vector< int64_t > target_groupby_indices_
std::string QueryMemoryDescriptor::toString ( ) const

Definition at line 1243 of file QueryMemoryDescriptor.cpp.

References allow_multifrag_, blocksShareMemory(), bucket_, entry_count_, GPU, interleaved_bins_on_gpu_, is_table_function_, lazyInitGroups(), max_val_, min_val_, must_use_baseline_sort_, output_columnar_, reductionKey(), render_output_, sort_on_gpu_, threadsShareMemory(), to_string(), use_streaming_top_n_, and usesGetGroupValueFast().

Referenced by Executor::createKernels(), and reductionKey().

1243  {
1244  auto str = reductionKey();
1245  str += "\tAllow Multifrag: " + ::toString(allow_multifrag_) + "\n";
1246  str += "\tInterleaved Bins on GPU: " + ::toString(interleaved_bins_on_gpu_) + "\n";
1247  str += "\tBlocks Share Memory: " + ::toString(blocksShareMemory()) + "\n";
1248  str += "\tThreads Share Memory: " + ::toString(threadsShareMemory()) + "\n";
1249  str += "\tUses Fast Group Values: " + ::toString(usesGetGroupValueFast()) + "\n";
1250  str +=
1251  "\tLazy Init Groups (GPU): " + ::toString(lazyInitGroups(ExecutorDeviceType::GPU)) +
1252  "\n";
1253  str += "\tEntry Count: " + std::to_string(entry_count_) + "\n";
1254  str += "\tMin Val (perfect hash only): " + std::to_string(min_val_) + "\n";
1255  str += "\tMax Val (perfect hash only): " + std::to_string(max_val_) + "\n";
1256  str += "\tBucket Val (perfect hash only): " + std::to_string(bucket_) + "\n";
1257  str += "\tSort on GPU: " + ::toString(sort_on_gpu_) + "\n";
1258  str += "\tUse Streaming Top N: " + ::toString(use_streaming_top_n_) + "\n";
1259  str += "\tOutput Columnar: " + ::toString(output_columnar_) + "\n";
1260  str += "\tRender Output: " + ::toString(render_output_) + "\n";
1261  str += "\tUse Baseline Sort: " + ::toString(must_use_baseline_sort_) + "\n";
1262  str += "\tIs Table Function: " + ::toString(is_table_function_) + "\n";
1263  return str;
1264 }
std::string toString() const
std::string to_string(char const *&&v)
bool lazyInitGroups(const ExecutorDeviceType) const
std::string reductionKey() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static TResultSetBufferDescriptor QueryMemoryDescriptor::toThrift ( const QueryMemoryDescriptor )
static
int8_t QueryMemoryDescriptor::updateActualMinByteWidth ( const int8_t  actual_min_byte_width) const

Definition at line 1195 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getMinPaddedByteSize().

1196  {
1197  return col_slot_context_.getMinPaddedByteSize(actual_min_byte_width);
1198 }
int8_t getMinPaddedByteSize(const int8_t actual_min_byte_width) const

+ Here is the call graph for this function:

void QueryMemoryDescriptor::useConsistentSlotWidthSize ( const int8_t  slot_width_size)

Definition at line 1186 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::setAllSlotsSize().

1186  {
1187  col_slot_context_.setAllSlotsSize(slot_width_size);
1188 }
void setAllSlotsSize(const int8_t slot_width_size)

+ Here is the call graph for this function:

bool QueryMemoryDescriptor::usesGetGroupValueFast ( ) const

Definition at line 1111 of file QueryMemoryDescriptor.cpp.

References getGroupbyColCount(), GroupByPerfectHash, and query_desc_type_.

Referenced by canOutputColumnar(), GroupByAndAggregate::codegen(), GroupByAndAggregate::codegenSingleColumnPerfectHash(), and toString().

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::useStreamingTopN ( ) const
inline
std::optional< size_t > QueryMemoryDescriptor::varlenOutputBufferElemSize ( ) const

Definition at line 1306 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, ColSlotContext::getSlotCount(), and ColSlotContext::varlenOutputElementSize().

Referenced by TargetExprCodegen::codegenAggregate(), create_dev_group_by_buffers(), QueryMemoryInitializer::createAndInitializeGroupByBufferGpu(), QueryExecutionContext::launchGpuCode(), and QueryMemoryInitializer::QueryMemoryInitializer().

1306  {
1307  int64_t buffer_element_size{0};
1308  for (size_t i = 0; i < col_slot_context_.getSlotCount(); i++) {
1309  try {
1310  const auto slot_element_size = col_slot_context_.varlenOutputElementSize(i);
1311  if (slot_element_size < 0) {
1312  return std::nullopt;
1313  }
1314  buffer_element_size += slot_element_size;
1315  } catch (...) {
1316  continue;
1317  }
1318  }
1319  return buffer_element_size;
1320 }
int64_t varlenOutputElementSize(const size_t slot_idx) const
size_t getSlotCount() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::varlenOutputRowSizeToSlot ( const size_t  slot_idx) const

Definition at line 1322 of file QueryMemoryDescriptor.cpp.

References CHECK_LT, col_slot_context_, ColSlotContext::getSlotCount(), and ColSlotContext::varlenOutputElementSize().

Referenced by TargetExprCodegen::codegenAggregate().

1322  {
1323  int64_t buffer_element_size{0};
1325  for (size_t i = 0; i < slot_idx; i++) {
1326  try {
1327  const auto slot_element_size = col_slot_context_.varlenOutputElementSize(i);
1328  if (slot_element_size < 0) {
1329  continue;
1330  }
1331  buffer_element_size += slot_element_size;
1332  } catch (...) {
1333  continue;
1334  }
1335  }
1336  return buffer_element_size;
1337 }
int64_t varlenOutputElementSize(const size_t slot_idx) const
#define CHECK_LT(x, y)
Definition: Logger.h:299
size_t getSlotCount() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Friends And Related Function Documentation

friend class QueryExecutionContext
friend

Definition at line 394 of file QueryMemoryDescriptor.h.

Referenced by getQueryExecutionContext().

friend class ResultSet
friend

Definition at line 393 of file QueryMemoryDescriptor.h.

Member Data Documentation

bool QueryMemoryDescriptor::allow_multifrag_
private

Definition at line 358 of file QueryMemoryDescriptor.h.

Referenced by toString().

int64_t QueryMemoryDescriptor::bucket_
private

Definition at line 375 of file QueryMemoryDescriptor.h.

Referenced by blocksShareMemory(), getBucket(), operator==(), and toString().

size_t QueryMemoryDescriptor::entry_count_
private
const Executor* QueryMemoryDescriptor::executor_
private
bool QueryMemoryDescriptor::force_4byte_float_
private

Definition at line 385 of file QueryMemoryDescriptor.h.

Referenced by forceFourByteFloat(), operator==(), and setForceFourByteFloat().

int8_t QueryMemoryDescriptor::group_col_compact_width_
private
bool QueryMemoryDescriptor::has_nulls_
private

Definition at line 376 of file QueryMemoryDescriptor.h.

Referenced by hasNulls(), and operator==().

int32_t QueryMemoryDescriptor::idx_target_as_key_
private

Definition at line 362 of file QueryMemoryDescriptor.h.

Referenced by getTargetIdxForKey(), operator==(), and setTargetIdxForKey().

bool QueryMemoryDescriptor::interleaved_bins_on_gpu_
private
bool QueryMemoryDescriptor::is_table_function_
private

Definition at line 382 of file QueryMemoryDescriptor.h.

Referenced by getColOffInBytes(), and toString().

bool QueryMemoryDescriptor::keyless_hash_
private
int64_t QueryMemoryDescriptor::max_val_
private

Definition at line 374 of file QueryMemoryDescriptor.h.

Referenced by blocksShareMemory(), getMaxVal(), operator==(), and toString().

int64_t QueryMemoryDescriptor::min_val_
private

Definition at line 372 of file QueryMemoryDescriptor.h.

Referenced by blocksShareMemory(), getMinVal(), operator==(), and toString().

bool QueryMemoryDescriptor::must_use_baseline_sort_
private

Definition at line 381 of file QueryMemoryDescriptor.h.

Referenced by mustUseBaselineSort(), and toString().

bool QueryMemoryDescriptor::render_output_
private

Definition at line 380 of file QueryMemoryDescriptor.h.

Referenced by blocksShareMemory(), lazyInitGroups(), and toString().

bool QueryMemoryDescriptor::sort_on_gpu_
private

Definition at line 378 of file QueryMemoryDescriptor.h.

Referenced by operator==(), QueryMemoryDescriptor(), sortOnGpu(), and toString().

std::vector<int64_t> QueryMemoryDescriptor::target_groupby_indices_
private
bool QueryMemoryDescriptor::use_streaming_top_n_
private

The documentation for this class was generated from the following files: