OmniSciDB  b24e664e58
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
QueryMemoryDescriptor Class Reference

#include <QueryMemoryDescriptor.h>

+ Collaboration diagram for QueryMemoryDescriptor:

Public Member Functions

 QueryMemoryDescriptor ()
 
 QueryMemoryDescriptor (const Executor *executor, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const bool allow_multifrag, const bool keyless_hash, const bool interleaved_bins_on_gpu, const int32_t idx_target_as_key, const ColRangeInfo &col_range_info, const ColSlotContext &col_slot_context, const std::vector< int8_t > &group_col_widths, const int8_t group_col_compact_width, const std::vector< ssize_t > &target_groupby_indices, const size_t entry_count, const GroupByMemSharing sharing, const bool shared_mem_for_group_by, const CountDistinctDescriptors count_distinct_descriptors, const bool sort_on_gpu_hint, const bool output_columnar, const bool render_output, const bool must_use_baseline_sort)
 
 QueryMemoryDescriptor (const Executor *executor, const size_t entry_count, const QueryDescriptionType query_desc_type, const bool is_table_function)
 
 QueryMemoryDescriptor (const QueryDescriptionType query_desc_type, const int64_t min_val, const int64_t max_val, const bool has_nulls, const std::vector< int8_t > &group_col_widths)
 
 QueryMemoryDescriptor (const TResultSetBufferDescriptor &thrift_query_memory_descriptor)
 
bool operator== (const QueryMemoryDescriptor &other) const
 
std::unique_ptr
< QueryExecutionContext
getQueryExecutionContext (const RelAlgExecutionUnit &, const Executor *executor, const ExecutorDeviceType device_type, const ExecutorDispatchMode dispatch_mode, const int device_id, const int64_t num_rows, const std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< std::vector< uint64_t >> &frag_offsets, std::shared_ptr< RowSetMemoryOwner >, const bool output_columnar, const bool sort_on_gpu, RenderInfo *) const
 
bool countDistinctDescriptorsLogicallyEmpty () const
 
const ExecutorgetExecutor () const
 
QueryDescriptionType getQueryDescriptionType () const
 
void setQueryDescriptionType (const QueryDescriptionType val)
 
bool isSingleColumnGroupByWithPerfectHash () const
 
bool hasKeylessHash () const
 
void setHasKeylessHash (const bool val)
 
bool hasInterleavedBinsOnGpu () const
 
void setHasInterleavedBinsOnGpu (const bool val)
 
int32_t getTargetIdxForKey () const
 
void setTargetIdxForKey (const int32_t val)
 
int8_t groupColWidth (const size_t key_idx) const
 
size_t getPrependedGroupColOffInBytes (const size_t group_idx) const
 
size_t getPrependedGroupBufferSizeInBytes () const
 
const auto groupColWidthsBegin () const
 
const auto groupColWidthsEnd () const
 
void clearGroupColWidths ()
 
bool isGroupBy () const
 
void setGroupColCompactWidth (const int8_t val)
 
size_t getColCount () const
 
size_t getSlotCount () const
 
const int8_t getPaddedSlotWidthBytes (const size_t slot_idx) const
 
const int8_t getLogicalSlotWidthBytes (const size_t slot_idx) const
 
const int8_t getSlotIndexForSingleSlotCol (const size_t col_idx) const
 
size_t getPaddedColWidthForRange (const size_t offset, const size_t range) const
 
void useConsistentSlotWidthSize (const int8_t slot_width_size)
 
size_t getRowWidth () const
 
int8_t updateActualMinByteWidth (const int8_t actual_min_byte_width) const
 
void addColSlotInfo (const std::vector< std::tuple< int8_t, int8_t >> &slots_for_col)
 
void clearSlotInfo ()
 
void alignPaddedSlots ()
 
ssize_t getTargetGroupbyIndex (const size_t target_idx) const
 
void setAllTargetGroupbyIndices (std::vector< ssize_t > group_by_indices)
 
size_t targetGroupbyIndicesSize () const
 
size_t targetGroupbyNegativeIndicesSize () const
 
void clearTargetGroupbyIndices ()
 
size_t getEntryCount () const
 
void setEntryCount (const size_t val)
 
int64_t getMinVal () const
 
int64_t getMaxVal () const
 
int64_t getBucket () const
 
bool hasNulls () const
 
GroupByMemSharing getGpuMemSharing () const
 
const CountDistinctDescriptorgetCountDistinctDescriptor (const size_t idx) const
 
size_t getCountDistinctDescriptorsSize () const
 
bool sortOnGpu () const
 
bool canOutputColumnar () const
 
bool didOutputColumnar () const
 
void setOutputColumnar (const bool val)
 
bool isLogicalSizedColumnsAllowed () const
 
bool mustUseBaselineSort () const
 
bool forceFourByteFloat () const
 
void setForceFourByteFloat (const bool val)
 
size_t getGroupbyColCount () const
 
size_t getKeyCount () const
 
size_t getBufferColSlotCount () const
 
size_t getBufferSizeBytes (const RelAlgExecutionUnit &ra_exe_unit, const unsigned thread_count, const ExecutorDeviceType device_type) const
 
size_t getBufferSizeBytes (const ExecutorDeviceType device_type) const
 
size_t getBufferSizeBytes (const ExecutorDeviceType device_type, const size_t override_entry_count) const
 
const ColSlotContextgetColSlotContext () const
 
bool usesGetGroupValueFast () const
 
bool blocksShareMemory () const
 
bool threadsShareMemory () const
 
bool lazyInitGroups (const ExecutorDeviceType) const
 
bool interleavedBins (const ExecutorDeviceType) const
 
size_t sharedMemBytes (const ExecutorDeviceType) const
 
size_t getColOffInBytes (const size_t col_idx) const
 
size_t getColOffInBytesInNextBin (const size_t col_idx) const
 
size_t getNextColOffInBytes (const int8_t *col_ptr, const size_t bin, const size_t col_idx) const
 
size_t getColOnlyOffInBytes (const size_t col_idx) const
 
size_t getRowSize () const
 
size_t getColsSize () const
 
size_t getWarpCount () const
 
size_t getCompactByteWidth () const
 
size_t getEffectiveKeyWidth () const
 
bool isWarpSyncRequired (const ExecutorDeviceType) const
 
std::string toString () const
 
std::string reductionKey () const
 

Static Public Member Functions

static TResultSetBufferDescriptor toThrift (const QueryMemoryDescriptor &)
 
static std::unique_ptr
< QueryMemoryDescriptor
init (const Executor *executor, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const ColRangeInfo &col_range_info, const KeylessInfo &keyless_info, const bool allow_multifrag, const ExecutorDeviceType device_type, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, const size_t shard_count, const size_t max_groups_buffer_entry_count, RenderInfo *render_info, const CountDistinctDescriptors count_distinct_descriptors, const bool must_use_baseline_sort, const bool output_columnar_hint)
 
static bool many_entries (const int64_t max_val, const int64_t min_val, const int64_t bucket)
 
static bool countDescriptorsLogicallyEmpty (const CountDistinctDescriptors &count_distinct_descriptors)
 
static int8_t pick_target_compact_width (const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const int8_t crt_min_byte_width)
 

Protected Member Functions

void resetGroupColWidths (const std::vector< int8_t > &new_group_col_widths)
 

Private Member Functions

size_t getTotalBytesOfColumnarBuffers () const
 
size_t getTotalBytesOfColumnarBuffers (const size_t num_entries_per_column) const
 
size_t getTotalBytesOfColumnarProjections (const size_t projection_count) const
 

Private Attributes

const Executorexecutor_
 
bool allow_multifrag_
 
QueryDescriptionType query_desc_type_
 
bool keyless_hash_
 
bool interleaved_bins_on_gpu_
 
int32_t idx_target_as_key_
 
std::vector< int8_t > group_col_widths_
 
int8_t group_col_compact_width_
 
std::vector< ssize_t > target_groupby_indices_
 
size_t entry_count_
 
int64_t min_val_
 
int64_t max_val_
 
int64_t bucket_
 
bool has_nulls_
 
GroupByMemSharing sharing_
 
CountDistinctDescriptors count_distinct_descriptors_
 
bool sort_on_gpu_
 
bool output_columnar_
 
bool render_output_
 
bool must_use_baseline_sort_
 
bool is_table_function_
 
bool force_4byte_float_
 
ColSlotContext col_slot_context_
 

Friends

class ResultSet
 
class QueryExecutionContext
 
template<typename META_CLASS_TYPE >
class AggregateReductionEgress
 

Detailed Description

Definition at line 66 of file QueryMemoryDescriptor.h.

Constructor & Destructor Documentation

QueryMemoryDescriptor::QueryMemoryDescriptor ( )

Definition at line 472 of file QueryMemoryDescriptor.cpp.

References Projection, and Shared.

473  : executor_(nullptr)
474  , allow_multifrag_(false)
476  , keyless_hash_(false)
477  , interleaved_bins_on_gpu_(false)
478  , idx_target_as_key_(0)
480  , entry_count_(0)
481  , min_val_(0)
482  , max_val_(0)
483  , bucket_(0)
484  , has_nulls_(false)
486  , sort_on_gpu_(false)
487  , output_columnar_(false)
488  , render_output_(false)
489  , must_use_baseline_sort_(false)
490  , is_table_function_(false)
491  , force_4byte_float_(false) {}
QueryDescriptionType query_desc_type_
QueryMemoryDescriptor::QueryMemoryDescriptor ( const Executor executor,
const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  query_infos,
const bool  allow_multifrag,
const bool  keyless_hash,
const bool  interleaved_bins_on_gpu,
const int32_t  idx_target_as_key,
const ColRangeInfo col_range_info,
const ColSlotContext col_slot_context,
const std::vector< int8_t > &  group_col_widths,
const int8_t  group_col_compact_width,
const std::vector< ssize_t > &  target_groupby_indices,
const size_t  entry_count,
const GroupByMemSharing  sharing,
const bool  shared_mem_for_group_by,
const CountDistinctDescriptors  count_distinct_descriptors,
const bool  sort_on_gpu_hint,
const bool  output_columnar,
const bool  render_output,
const bool  must_use_baseline_sort 
)

Definition at line 373 of file QueryMemoryDescriptor.cpp.

References canOutputColumnar(), CHECK(), col_slot_context_, count_distinct_descriptors_, countDescriptorsLogicallyEmpty(), getRowSize(), GroupByBaselineHash, GroupByPerfectHash, interleaved_bins_on_gpu_, isLogicalSizedColumnsAllowed(), keyless_hash_, NonGroupedAggregate, output_columnar_, Projection, query_desc_type_, ColSlotContext::setAllSlotsPaddedSizeToLogicalSize(), ColSlotContext::setAllUnsetSlotsPaddedSize(), SharedForKeylessOneColumnKnownRange, sharing_, sort_on_gpu_, RelAlgExecutionUnit::use_bump_allocator, and ColSlotContext::validate().

394  : executor_(executor)
395  , allow_multifrag_(allow_multifrag)
396  , query_desc_type_(col_range_info.hash_type_)
397  , keyless_hash_(keyless_hash)
398  , interleaved_bins_on_gpu_(interleaved_bins_on_gpu)
399  , idx_target_as_key_(idx_target_as_key)
400  , group_col_widths_(group_col_widths)
401  , group_col_compact_width_(group_col_compact_width)
402  , target_groupby_indices_(target_groupby_indices)
403  , entry_count_(entry_count)
404  , min_val_(col_range_info.min)
405  , max_val_(col_range_info.max)
406  , bucket_(col_range_info.bucket)
407  , has_nulls_(col_range_info.has_nulls)
408  , sharing_(sharing)
409  , count_distinct_descriptors_(count_distinct_descriptors)
410  , output_columnar_(false)
411  , render_output_(render_output)
412  , must_use_baseline_sort_(must_use_baseline_sort)
413  , is_table_function_(false)
414  , force_4byte_float_(false)
415  , col_slot_context_(col_slot_context) {
418 
419  // TODO(Saman): should remove this after implementing shared memory path
420  // completely through codegen We should not use the current shared memory path if
421  // more than 8 bytes per group is required
423  shared_mem_for_group_by && (getRowSize() <= sizeof(int64_t))) {
424  // TODO(adb / saman): Move this into a different enum so we can remove
425  // GroupByMemSharing
427  interleaved_bins_on_gpu_ = false;
428  }
429 
430  // Note that output_columnar_ currently defaults to false to avoid issues with
431  // getRowSize above. If output columnar is enable then shared_mem_for_group_by is not,
432  // and the above condition would never be true.
433 
434  sort_on_gpu_ = sort_on_gpu_hint && canOutputColumnar() && !keyless_hash_;
435 
436  if (sort_on_gpu_) {
437  CHECK(!ra_exe_unit.use_bump_allocator);
438  output_columnar_ = true;
439  } else {
440  switch (query_desc_type_) {
442  output_columnar_ = output_columnar_hint;
443  break;
448  break;
450  output_columnar_ = output_columnar_hint;
451  break;
456  break;
457  default:
458  output_columnar_ = false;
459  break;
460  }
461  }
462 
464  // TODO(adb): Ensure fixed size buffer allocations are correct with all logical column
465  // sizes
466  CHECK(!ra_exe_unit.use_bump_allocator);
469  }
470 }
bool isLogicalSizedColumnsAllowed() const
QueryDescriptionType hash_type_
CHECK(cgen_state)
CountDistinctDescriptors count_distinct_descriptors_
void validate() const
QueryDescriptionType query_desc_type_
void setAllSlotsPaddedSizeToLogicalSize()
std::vector< int8_t > group_col_widths_
static bool countDescriptorsLogicallyEmpty(const CountDistinctDescriptors &count_distinct_descriptors)
void setAllUnsetSlotsPaddedSize(const int8_t padded_size)
std::vector< ssize_t > target_groupby_indices_

+ Here is the call graph for this function:

QueryMemoryDescriptor::QueryMemoryDescriptor ( const Executor executor,
const size_t  entry_count,
const QueryDescriptionType  query_desc_type,
const bool  is_table_function 
)

Definition at line 493 of file QueryMemoryDescriptor.cpp.

References Shared.

497  : executor_(executor)
498  , allow_multifrag_(false)
499  , query_desc_type_(query_desc_type)
500  , keyless_hash_(false)
501  , interleaved_bins_on_gpu_(false)
502  , idx_target_as_key_(0)
504  , entry_count_(entry_count)
505  , min_val_(0)
506  , max_val_(0)
507  , bucket_(0)
508  , has_nulls_(false)
510  , sort_on_gpu_(false)
511  , output_columnar_(false)
512  , render_output_(false)
513  , must_use_baseline_sort_(false)
514  , is_table_function_(is_table_function)
515  , force_4byte_float_(false) {}
QueryDescriptionType query_desc_type_
QueryMemoryDescriptor::QueryMemoryDescriptor ( const QueryDescriptionType  query_desc_type,
const int64_t  min_val,
const int64_t  max_val,
const bool  has_nulls,
const std::vector< int8_t > &  group_col_widths 
)

Definition at line 517 of file QueryMemoryDescriptor.cpp.

References Shared.

522  : executor_(nullptr)
523  , allow_multifrag_(false)
524  , query_desc_type_(query_desc_type)
525  , keyless_hash_(false)
526  , interleaved_bins_on_gpu_(false)
527  , idx_target_as_key_(0)
528  , group_col_widths_(group_col_widths)
530  , entry_count_(0)
531  , min_val_(min_val)
532  , max_val_(max_val)
533  , bucket_(0)
534  , has_nulls_(false)
536  , sort_on_gpu_(false)
537  , output_columnar_(false)
538  , render_output_(false)
539  , must_use_baseline_sort_(false)
540  , is_table_function_(false)
541  , force_4byte_float_(false) {}
QueryDescriptionType query_desc_type_
std::vector< int8_t > group_col_widths_
QueryMemoryDescriptor::QueryMemoryDescriptor ( const TResultSetBufferDescriptor &  thrift_query_memory_descriptor)

Member Function Documentation

void QueryMemoryDescriptor::addColSlotInfo ( const std::vector< std::tuple< int8_t, int8_t >> &  slots_for_col)

Definition at line 1090 of file QueryMemoryDescriptor.cpp.

References ColSlotContext::addColumn(), and col_slot_context_.

Referenced by RelAlgExecutor::executeLogicalValues(), TableFunctionExecutionContext::launchCpuCode(), and TableFunctionExecutionContext::launchGpuCode().

1091  {
1092  col_slot_context_.addColumn(slots_for_col);
1093 }
void addColumn(const std::vector< std::tuple< int8_t, int8_t >> &slots_for_col)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void QueryMemoryDescriptor::alignPaddedSlots ( )

Definition at line 1099 of file QueryMemoryDescriptor.cpp.

References ColSlotContext::alignPaddedSlots(), col_slot_context_, and sortOnGpu().

1099  {
1101 }
void alignPaddedSlots(const bool sort_on_gpu)

+ Here is the call graph for this function:

bool QueryMemoryDescriptor::blocksShareMemory ( ) const

Definition at line 995 of file QueryMemoryDescriptor.cpp.

References bucket_, count_distinct_descriptors_, countDescriptorsLogicallyEmpty(), executor_, g_cluster, getGroupbyColCount(), GPU, GroupByBaselineHash, GroupByPerfectHash, is_table_function_, many_entries(), max_val_, min_val_, Projection, query_desc_type_, render_output_, and sharedMemBytes().

Referenced by canOutputColumnar(), ResultSetReductionJIT::codegen(), QueryMemoryInitializer::computeNumberOfBuffers(), copy_group_by_buffers_from_gpu(), create_dev_group_by_buffers(), and toString().

995  {
996  if (g_cluster || is_table_function_) {
997  return true;
998  }
1000  return true;
1001  }
1002  if (executor_->isCPUOnly() || render_output_ ||
1006  getGroupbyColCount() > 1)) {
1007  return true;
1008  }
1012 }
static bool many_entries(const int64_t max_val, const int64_t min_val, const int64_t bucket)
bool g_cluster
size_t getGroupbyColCount() const
CountDistinctDescriptors count_distinct_descriptors_
size_t sharedMemBytes(const ExecutorDeviceType) const
QueryDescriptionType query_desc_type_
static bool countDescriptorsLogicallyEmpty(const CountDistinctDescriptors &count_distinct_descriptors)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::canOutputColumnar ( ) const

Definition at line 1103 of file QueryMemoryDescriptor.cpp.

References blocksShareMemory(), count_distinct_descriptors_, countDescriptorsLogicallyEmpty(), GPU, interleavedBins(), threadsShareMemory(), and usesGetGroupValueFast().

Referenced by QueryMemoryDescriptor().

1103  {
1107 }
CountDistinctDescriptors count_distinct_descriptors_
bool interleavedBins(const ExecutorDeviceType) const
static bool countDescriptorsLogicallyEmpty(const CountDistinctDescriptors &count_distinct_descriptors)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void QueryMemoryDescriptor::clearGroupColWidths ( )
inline

Definition at line 191 of file QueryMemoryDescriptor.h.

References group_col_widths_.

191 { group_col_widths_.clear(); }
std::vector< int8_t > group_col_widths_
void QueryMemoryDescriptor::clearSlotInfo ( )

Definition at line 1095 of file QueryMemoryDescriptor.cpp.

References ColSlotContext::clear(), and col_slot_context_.

1095  {
1097 }

+ Here is the call graph for this function:

void QueryMemoryDescriptor::clearTargetGroupbyIndices ( )
inline

Definition at line 240 of file QueryMemoryDescriptor.h.

References target_groupby_indices_.

240 { target_groupby_indices_.clear(); }
std::vector< ssize_t > target_groupby_indices_
static bool QueryMemoryDescriptor::countDescriptorsLogicallyEmpty ( const CountDistinctDescriptors count_distinct_descriptors)
inlinestatic

Definition at line 146 of file QueryMemoryDescriptor.h.

References Invalid.

Referenced by blocksShareMemory(), canOutputColumnar(), countDistinctDescriptorsLogicallyEmpty(), lazyInitGroups(), and QueryMemoryDescriptor().

147  {
148  return std::all_of(count_distinct_descriptors.begin(),
149  count_distinct_descriptors.end(),
150  [](const CountDistinctDescriptor& desc) {
151  return desc.impl_type_ == CountDistinctImplType::Invalid;
152  });
153  }

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::countDistinctDescriptorsLogicallyEmpty ( ) const
inline

Definition at line 155 of file QueryMemoryDescriptor.h.

References count_distinct_descriptors_, and countDescriptorsLogicallyEmpty().

Referenced by QueryMemoryInitializer::allocateCountDistinctGpuMem().

155  {
157  }
CountDistinctDescriptors count_distinct_descriptors_
static bool countDescriptorsLogicallyEmpty(const CountDistinctDescriptors &count_distinct_descriptors)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::didOutputColumnar ( ) const
inline

Definition at line 263 of file QueryMemoryDescriptor.h.

References output_columnar_.

Referenced by TargetExprCodegen::codegen(), ResultSetReductionJIT::codegen(), GroupByAndAggregate::codegen(), GroupByAndAggregate::codegenAggCalls(), GroupByAndAggregate::codegenAggColumnPtr(), GroupByAndAggregate::codegenGroupBy(), GroupByAndAggregate::codegenMultiColumnBaselineHash(), GroupByAndAggregate::codegenMultiColumnPerfectHash(), GroupByAndAggregate::codegenOutputSlot(), GroupByAndAggregate::codegenSingleColumnPerfectHash(), GroupByAndAggregate::codegenWindowRowPointer(), copy_projection_buffer_from_gpu_columnar(), QueryMemoryInitializer::copyGroupByBuffersFromGpu(), ResultSetStorage::copyKeyColWise(), ResultSet::createComparator(), ResultSet::didOutputColumnar(), anonymous_namespace{ResultSetReduction.cpp}::fill_slots(), ResultSetStorage::fillOneEntryColWise(), ResultSetStorage::fillOneEntryRowWise(), ResultSet::fixupQueryMemoryDescriptor(), get_cols_ptr(), ResultSet::getTargetValueFromBufferColwise(), ResultSetStorage::initializeBaselineValueSlots(), anonymous_namespace{TargetExprBuilder.cpp}::is_columnar_projection(), ResultSetReductionJIT::isEmpty(), ResultSetStorage::isEmptyEntry(), ResultSetStorage::isEmptyEntryColumnar(), QueryExecutionContext::launchCpuCode(), QueryExecutionContext::launchGpuCode(), ResultSet::makeGeoTargetValue(), ResultSetStorage::moveOneEntryToBuffer(), QueryMemoryInitializer::QueryMemoryInitializer(), ResultSetStorage::reduce(), ResultSetStorage::reduceOneEntryBaseline(), ResultSetReductionJIT::reduceOneEntryBaselineIdx(), ResultSetStorage::reduceOneEntrySlotsBaseline(), ResultSetStorage::reduceOneSlotBaseline(), ResultSetStorage::reduceSingleRow(), and ResultSetStorage::rewriteAggregateBufferOffsets().

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::forceFourByteFloat ( ) const
inline

Definition at line 272 of file QueryMemoryDescriptor.h.

References force_4byte_float_.

Referenced by ResultSet::makeTargetValue().

+ Here is the caller graph for this function:

int64_t QueryMemoryDescriptor::getBucket ( ) const
inline

Definition at line 247 of file QueryMemoryDescriptor.h.

References bucket_.

Referenced by GroupByAndAggregate::codegenGroupBy(), and GroupByAndAggregate::codegenSingleColumnPerfectHash().

247 { return bucket_; }

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getBufferColSlotCount ( ) const

Definition at line 975 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, ColSlotContext::getSlotCount(), and target_groupby_indices_.

Referenced by anonymous_namespace{ResultSetIteration.cpp}::advance_col_buff_to_slot(), anonymous_namespace{ResultSetReduction.cpp}::fill_slots(), ResultSetStorage::fillOneEntryColWise(), and ResultSetStorage::fillOneEntryRowWise().

975  {
976  size_t total_slot_count = col_slot_context_.getSlotCount();
977 
978  if (target_groupby_indices_.empty()) {
979  return total_slot_count;
980  }
981  return total_slot_count - std::count_if(target_groupby_indices_.begin(),
983  [](const ssize_t i) { return i >= 0; });
984 }
size_t getSlotCount() const
std::vector< ssize_t > target_groupby_indices_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getBufferSizeBytes ( const RelAlgExecutionUnit ra_exe_unit,
const unsigned  thread_count,
const ExecutorDeviceType  device_type 
) const

Definition at line 906 of file QueryMemoryDescriptor.cpp.

References entry_count_, streaming_top_n::get_heap_size(), getRowSize(), SortInfo::limit, SortInfo::offset, output_columnar_, RelAlgExecutionUnit::sort_info, and use_streaming_top_n().

Referenced by QueryMemoryInitializer::applyStreamingTopNOffsetCpu(), QueryMemoryInitializer::copyGroupByBuffersFromGpu(), create_dev_group_by_buffers(), getBufferSizeBytes(), and QueryMemoryInitializer::QueryMemoryInitializer().

909  {
910  if (use_streaming_top_n(ra_exe_unit, output_columnar_)) {
911  const size_t n = ra_exe_unit.sort_info.offset + ra_exe_unit.sort_info.limit;
912  return streaming_top_n::get_heap_size(getRowSize(), n, thread_count);
913  }
914  return getBufferSizeBytes(device_type, entry_count_);
915 }
size_t getBufferSizeBytes(const RelAlgExecutionUnit &ra_exe_unit, const unsigned thread_count, const ExecutorDeviceType device_type) const
bool use_streaming_top_n(const RelAlgExecutionUnit &ra_exe_unit, const bool output_columnar)
const size_t limit
const SortInfo sort_info
size_t get_heap_size(const size_t row_size, const size_t n, const size_t thread_count)
const size_t offset

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getBufferSizeBytes ( const ExecutorDeviceType  device_type) const

Definition at line 952 of file QueryMemoryDescriptor.cpp.

References entry_count_, and getBufferSizeBytes().

953  {
954  return getBufferSizeBytes(device_type, entry_count_);
955 }
size_t getBufferSizeBytes(const RelAlgExecutionUnit &ra_exe_unit, const unsigned thread_count, const ExecutorDeviceType device_type) const

+ Here is the call graph for this function:

size_t QueryMemoryDescriptor::getBufferSizeBytes ( const ExecutorDeviceType  device_type,
const size_t  entry_count 
) const

Returns total amount of output buffer memory for each device (CPU/GPU)

Columnar: if projection: it returns index buffer + columnar buffer (all non-lazy columns) if group by: it returns the amount required for each group column (assumes 64-bit per group) + columnar buffer (all involved agg columns)

Row-wise: returns required memory per row multiplied by number of entries

Definition at line 928 of file QueryMemoryDescriptor.cpp.

References align_to_int64(), CHECK_GE, executor_, getColsSize(), getRowSize(), getTotalBytesOfColumnarBuffers(), group_col_widths_, interleavedBins(), keyless_hash_, output_columnar_, Projection, and query_desc_type_.

929  {
931  CHECK_GE(group_col_widths_.size(), size_t(1));
932  auto row_bytes = align_to_int64(getColsSize());
933 
934  return (interleavedBins(device_type) ? executor_->warpSize() : 1) * entry_count *
935  row_bytes;
936  }
937 
938  constexpr size_t row_index_width = sizeof(int64_t);
939  size_t total_bytes{0};
940  if (output_columnar_) {
942  ? row_index_width * entry_count
943  : sizeof(int64_t) * group_col_widths_.size() * entry_count) +
945  } else {
946  total_bytes = getRowSize() * entry_count;
947  }
948 
949  return total_bytes;
950 }
#define CHECK_GE(x, y)
Definition: Logger.h:203
size_t getTotalBytesOfColumnarBuffers() const
QueryDescriptionType query_desc_type_
bool interleavedBins(const ExecutorDeviceType) const
std::vector< int8_t > group_col_widths_
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)

+ Here is the call graph for this function:

size_t QueryMemoryDescriptor::getColCount ( ) const

Definition at line 1052 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getColCount().

1052  {
1053  return col_slot_context_.getColCount();
1054 }
size_t getColCount() const

+ Here is the call graph for this function:

size_t QueryMemoryDescriptor::getColOffInBytes ( const size_t  col_idx) const

Definition at line 803 of file QueryMemoryDescriptor.cpp.

References align_to_int64(), CHECK_EQ, entry_count_, getColOnlyOffInBytes(), getEffectiveKeyWidth(), getPaddedSlotWidthBytes(), getPrependedGroupBufferSizeInBytes(), getWarpCount(), group_col_widths_, keyless_hash_, and output_columnar_.

Referenced by TargetExprCodegen::codegen(), GroupByAndAggregate::codegenAggColumnPtr(), GroupByAndAggregate::codegenOutputSlot(), anonymous_namespace{QueryMemoryInitializer.cpp}::compact_projection_buffer_for_cpu_columnar(), copy_projection_buffer_from_gpu_columnar(), get_cols_ptr(), QueryExecutionContext::groupBufferToDeinterleavedResults(), QueryMemoryInitializer::initGroups(), inplace_sort_gpu(), and anonymous_namespace{Execute.cpp}::permute_storage_columnar().

803  {
804  const auto warp_count = getWarpCount();
805  if (output_columnar_) {
806  CHECK_EQ(size_t(1), warp_count);
807  size_t offset{0};
808  if (!keyless_hash_) {
810  }
811  for (size_t index = 0; index < col_idx; ++index) {
813  }
814  return offset;
815  }
816 
817  size_t offset{0};
818  if (keyless_hash_) {
819  CHECK_EQ(size_t(1), group_col_widths_.size());
820  } else {
821  offset += group_col_widths_.size() * getEffectiveKeyWidth();
822  offset = align_to_int64(offset);
823  }
824  offset += getColOnlyOffInBytes(col_idx);
825  return offset;
826 }
#define CHECK_EQ(x, y)
Definition: Logger.h:198
size_t getEffectiveKeyWidth() const
size_t getColOnlyOffInBytes(const size_t col_idx) const
size_t getPrependedGroupBufferSizeInBytes() const
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
std::vector< int8_t > group_col_widths_
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getColOffInBytesInNextBin ( const size_t  col_idx) const

Definition at line 861 of file QueryMemoryDescriptor.cpp.

References CHECK_EQ, getPaddedSlotWidthBytes(), getRowSize(), getWarpCount(), group_col_widths_, and output_columnar_.

Referenced by QueryExecutionContext::groupBufferToDeinterleavedResults().

861  {
862  auto warp_count = getWarpCount();
863  if (output_columnar_) {
864  CHECK_EQ(size_t(1), group_col_widths_.size());
865  CHECK_EQ(size_t(1), warp_count);
866  return getPaddedSlotWidthBytes(col_idx);
867  }
868 
869  return warp_count * getRowSize();
870 }
#define CHECK_EQ(x, y)
Definition: Logger.h:198
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
std::vector< int8_t > group_col_widths_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getColOnlyOffInBytes ( const size_t  col_idx) const

Definition at line 790 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getColOnlyOffInBytes().

Referenced by TargetExprCodegen::codegen(), GroupByAndAggregate::codegenAggColumnPtr(), getColOffInBytes(), and ResultSetStorage::reduceSingleRow().

790  {
791  return col_slot_context_.getColOnlyOffInBytes(col_idx);
792 }
size_t getColOnlyOffInBytes(const size_t slot_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const ColSlotContext& QueryMemoryDescriptor::getColSlotContext ( ) const
inline

Definition at line 287 of file QueryMemoryDescriptor.h.

References col_slot_context_.

Referenced by ResultSetStorage::reduceEntriesNoCollisionsColWise(), and ResultSetReductionJIT::reduceOneEntryTargetsNoCollisions().

287 { return col_slot_context_; }

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getColsSize ( ) const

Definition at line 730 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getAllSlotsAlignedPaddedSize().

Referenced by getBufferSizeBytes(), getRowSize(), and QueryExecutionContext::launchCpuCode().

730  {
732 }
size_t getAllSlotsAlignedPaddedSize() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getCompactByteWidth ( ) const

Definition at line 751 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getCompactByteWidth().

Referenced by anonymous_namespace{TargetExprBuilder.cpp}::get_initial_agg_val(), and anonymous_namespace{OutputBufferInitialization.cpp}::init_agg_val_vec().

751  {
753 }
size_t getCompactByteWidth() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const CountDistinctDescriptor& QueryMemoryDescriptor::getCountDistinctDescriptor ( const size_t  idx) const
inline
size_t QueryMemoryDescriptor::getCountDistinctDescriptorsSize ( ) const
inline

Definition at line 256 of file QueryMemoryDescriptor.h.

References count_distinct_descriptors_.

Referenced by QueryMemoryInitializer::allocateCountDistinctGpuMem(), anonymous_namespace{QueryMemoryInitializer.cpp}::check_total_bitmap_memory(), ResultSetReductionJIT::reduceOneCountDistinctSlot(), and ResultSetStorage::reduceOneCountDistinctSlot().

256  {
257  return count_distinct_descriptors_.size();
258  }
CountDistinctDescriptors count_distinct_descriptors_

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getEntryCount ( ) const
inline

Definition at line 242 of file QueryMemoryDescriptor.h.

References entry_count_.

Referenced by advance_to_next_columnar_target_buff(), QueryMemoryInitializer::allocateCountDistinctGpuMem(), QueryMemoryInitializer::applyStreamingTopNOffsetCpu(), QueryMemoryInitializer::applyStreamingTopNOffsetGpu(), anonymous_namespace{QueryMemoryInitializer.cpp}::check_total_bitmap_memory(), ResultSetReductionJIT::codegen(), GroupByAndAggregate::codegenMultiColumnBaselineHash(), GroupByAndAggregate::codegenMultiColumnPerfectHash(), GroupByAndAggregate::codegenOutputSlot(), GroupByAndAggregate::codegenWindowRowPointer(), anonymous_namespace{QueryMemoryInitializer.cpp}::compact_projection_buffer_for_cpu_columnar(), QueryMemoryInitializer::compactProjectionBuffersCpu(), QueryMemoryInitializer::compactProjectionBuffersGpu(), copy_group_by_buffers_from_gpu(), create_dev_group_by_buffers(), Executor::dispatchFragments(), ResultSet::entryCount(), ResultSetStorage::fillOneEntryColWise(), ResultSetStorage::fillOneEntryRowWise(), anonymous_namespace{ResultSetReduction.cpp}::get_matching_group_value_reduction(), getPrependedGroupBufferSizeInBytes(), getPrependedGroupColOffInBytes(), ResultSet::getTargetValueFromBufferColwise(), QueryMemoryInitializer::initColumnarGroups(), ResultSetStorage::initializeBaselineValueSlots(), ResultSetStorage::initializeColWise(), ResultSetStorage::initializeRowWise(), inplace_sort_gpu(), QueryExecutionContext::launchGpuCode(), ResultSetStorage::moveEntriesToBuffer(), ResultSetStorage::moveOneEntryToBuffer(), query_group_by_template_impl(), QueryMemoryInitializer::QueryMemoryInitializer(), ResultSetStorage::reduce(), ResultSetStorage::reduceOneEntryBaseline(), ResultSetStorage::reduceOneEntrySlotsBaseline(), ResultSetStorage::reduceOneSlotBaseline(), and ResultSetStorage::rewriteAggregateBufferOffsets().

242 { return entry_count_; }

+ Here is the caller graph for this function:

const Executor* QueryMemoryDescriptor::getExecutor ( ) const
inline

Definition at line 164 of file QueryMemoryDescriptor.h.

References executor_.

Referenced by anonymous_namespace{Execute.cpp}::build_row_for_empty_input(), ResultSetReductionJIT::codegen(), anonymous_namespace{Execute.cpp}::fill_entries_for_empty_input(), ResultSet::getVarlenOrderEntry(), ResultSet::makeGeoTargetValue(), and ResultSet::makeVarlenTargetValue().

164 { return executor_; }

+ Here is the caller graph for this function:

GroupByMemSharing QueryMemoryDescriptor::getGpuMemSharing ( ) const
inline

Definition at line 250 of file QueryMemoryDescriptor.h.

References sharing_.

Referenced by TargetExprCodegen::codegen(), and query_group_by_template_impl().

250 { return sharing_; }

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getKeyCount ( ) const
inline

Definition at line 277 of file QueryMemoryDescriptor.h.

References getGroupbyColCount(), and keyless_hash_.

Referenced by anonymous_namespace{Execute.cpp}::permute_storage_columnar().

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const int8_t QueryMemoryDescriptor::getLogicalSlotWidthBytes ( const size_t  slot_idx) const

Definition at line 1064 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, ColSlotContext::getSlotInfo(), and SlotSize::logical_size.

Referenced by QueryMemoryInitializer::allocateCountDistinctBuffers(), TargetExprCodegen::codegen(), and ResultSet::getTargetValueFromBufferRowwise().

1065  {
1066  return col_slot_context_.getSlotInfo(slot_idx).logical_size;
1067 }
int8_t logical_size
const SlotSize & getSlotInfo(const size_t slot_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int64_t QueryMemoryDescriptor::getMaxVal ( ) const
inline

Definition at line 246 of file QueryMemoryDescriptor.h.

References max_val_.

Referenced by GroupByAndAggregate::codegenGroupBy().

246 { return max_val_; }

+ Here is the caller graph for this function:

int64_t QueryMemoryDescriptor::getMinVal ( ) const
inline

Definition at line 245 of file QueryMemoryDescriptor.h.

References min_val_.

Referenced by GroupByAndAggregate::codegenSingleColumnPerfectHash().

245 { return min_val_; }

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getNextColOffInBytes ( const int8_t *  col_ptr,
const size_t  bin,
const size_t  col_idx 
) const

Definition at line 872 of file QueryMemoryDescriptor.cpp.

References align_to_int64(), CHECK(), CHECK_EQ, entry_count_, getPaddedSlotWidthBytes(), getSlotCount(), getWarpCount(), group_col_widths_, and output_columnar_.

Referenced by QueryMemoryInitializer::initColumnPerRow().

874  {
876  size_t offset{0};
877  auto warp_count = getWarpCount();
878  const auto chosen_bytes = getPaddedSlotWidthBytes(col_idx);
879  const auto total_slot_count = getSlotCount();
880  if (col_idx + 1 == total_slot_count) {
881  if (output_columnar_) {
882  return (entry_count_ - bin) * chosen_bytes;
883  } else {
884  return static_cast<size_t>(align_to_int64(col_ptr + chosen_bytes) - col_ptr);
885  }
886  }
887 
888  const auto next_chosen_bytes = getPaddedSlotWidthBytes(col_idx + 1);
889  if (output_columnar_) {
890  CHECK_EQ(size_t(1), group_col_widths_.size());
891  CHECK_EQ(size_t(1), warp_count);
892 
893  offset = align_to_int64(entry_count_ * chosen_bytes);
894 
895  offset += bin * (next_chosen_bytes - chosen_bytes);
896  return offset;
897  }
898 
899  if (next_chosen_bytes == sizeof(int64_t)) {
900  return static_cast<size_t>(align_to_int64(col_ptr + chosen_bytes) - col_ptr);
901  } else {
902  return chosen_bytes;
903  }
904 }
#define CHECK_EQ(x, y)
Definition: Logger.h:198
CHECK(cgen_state)
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
std::vector< int8_t > group_col_widths_
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getPaddedColWidthForRange ( const size_t  offset,
const size_t  range 
) const
inline

Definition at line 205 of file QueryMemoryDescriptor.h.

References getPaddedSlotWidthBytes().

Referenced by get_byteoff_of_slot(), and ResultSet::makeGeoTargetValue().

205  {
206  size_t ret = 0;
207  for (size_t i = offset; i < offset + range; i++) {
208  ret += static_cast<size_t>(getPaddedSlotWidthBytes(i));
209  }
210  return ret;
211  }
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const int8_t QueryMemoryDescriptor::getPaddedSlotWidthBytes ( const size_t  slot_idx) const

Definition at line 1060 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, ColSlotContext::getSlotInfo(), and SlotSize::padded_size.

Referenced by advance_target_ptr_row_wise(), advance_to_next_columnar_target_buff(), TargetExprCodegen::codegen(), GroupByAndAggregate::codegenOutputSlot(), compact_init_vals(), anonymous_namespace{QueryMemoryInitializer.cpp}::compact_projection_buffer_for_cpu_columnar(), copy_projection_buffer_from_gpu_columnar(), ResultSet::copyColumnIntoBuffer(), get_width_for_slot(), getColOffInBytes(), getColOffInBytesInNextBin(), getNextColOffInBytes(), getPaddedColWidthForRange(), ResultSet::getPaddedSlotWidthBytes(), ResultSet::getTargetValueFromBufferColwise(), ResultSet::getTargetValueFromBufferRowwise(), anonymous_namespace{OutputBufferInitialization.cpp}::init_agg_val_vec(), QueryMemoryInitializer::initColumnarGroups(), QueryMemoryInitializer::initColumnPerRow(), inplace_sort_gpu(), ResultSetReductionJIT::isEmpty(), ResultSetStorage::isEmptyEntry(), ResultSetStorage::isEmptyEntryColumnar(), ResultSet::makeGeoTargetValue(), TargetExprCodegenBuilder::operator()(), AggregateReductionEgress< META_TYPE_CLASS >::operator()(), AggregateReductionEgress< Experimental::MetaTypeClass< Experimental::Geometry > >::operator()(), anonymous_namespace{Execute.cpp}::permute_storage_columnar(), ResultSetStorage::reduceEntriesNoCollisionsColWise(), ResultSetReductionJIT::reduceOneAggregateSlot(), ResultSetReductionJIT::reduceOneEntryTargetsNoCollisions(), ResultSetStorage::reduceOneSlot(), ResultSetStorage::reduceSingleRow(), and ResultSetStorage::rewriteAggregateBufferOffsets().

1060  {
1061  return col_slot_context_.getSlotInfo(slot_idx).padded_size;
1062 }
const SlotSize & getSlotInfo(const size_t slot_idx) const
int8_t padded_size

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getPrependedGroupBufferSizeInBytes ( ) const

Definition at line 850 of file QueryMemoryDescriptor.cpp.

References align_to_int64(), CHECK(), getEntryCount(), getGroupbyColCount(), groupColWidth(), and output_columnar_.

Referenced by getColOffInBytes().

850  {
852  size_t buffer_size{0};
853  for (size_t group_idx = 0; group_idx < getGroupbyColCount(); group_idx++) {
854  buffer_size += align_to_int64(
855  std::max(groupColWidth(group_idx), static_cast<int8_t>(sizeof(int64_t))) *
856  getEntryCount());
857  }
858  return buffer_size;
859 }
int8_t groupColWidth(const size_t key_idx) const
CHECK(cgen_state)
size_t getGroupbyColCount() const
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getPrependedGroupColOffInBytes ( const size_t  group_idx) const

Definition at line 832 of file QueryMemoryDescriptor.cpp.

References align_to_int64(), CHECK(), getEntryCount(), getGroupbyColCount(), groupColWidth(), and output_columnar_.

Referenced by ResultSetStorage::copyKeyColWise(), ResultSetStorage::isEmptyEntryColumnar(), and anonymous_namespace{Execute.cpp}::permute_storage_columnar().

833  {
835  CHECK(group_idx < getGroupbyColCount());
836  size_t offset{0};
837  for (size_t col_idx = 0; col_idx < group_idx; col_idx++) {
838  // TODO(Saman): relax that int64_bit part immediately
839  offset += align_to_int64(
840  std::max(groupColWidth(col_idx), static_cast<int8_t>(sizeof(int64_t))) *
841  getEntryCount());
842  }
843  return offset;
844 }
int8_t groupColWidth(const size_t key_idx) const
CHECK(cgen_state)
size_t getGroupbyColCount() const
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

QueryDescriptionType QueryMemoryDescriptor::getQueryDescriptionType ( ) const
inline

Definition at line 166 of file QueryMemoryDescriptor.h.

References query_desc_type_.

Referenced by ResultSetReductionJIT::codegen(), GroupByAndAggregate::codegen(), GroupByAndAggregate::codegenAggCalls(), GroupByAndAggregate::codegenAggColumnPtr(), GroupByAndAggregate::codegenGroupBy(), GroupByAndAggregate::codegenMultiColumnPerfectHash(), GroupByAndAggregate::codegenOutputSlot(), Executor::collectAllDeviceResults(), copy_projection_buffer_from_gpu_columnar(), Executor::dispatchFragments(), ResultSet::getQueryDescriptionType(), init_agg_val_vec(), anonymous_namespace{TargetExprBuilder.cpp}::is_columnar_projection(), ResultSetReductionJIT::isEmpty(), ResultSetStorage::isEmptyEntry(), ResultSetStorage::isEmptyEntryColumnar(), isSingleColumnGroupByWithPerfectHash(), QueryExecutionContext::launchCpuCode(), QueryExecutionContext::launchGpuCode(), ResultSetStorage::moveEntriesToBuffer(), TargetExprCodegenBuilder::operator()(), ResultSetStorage::reduce(), Executor::reduceMultiDeviceResultSets(), ResultSetStorage::reduceOneEntryBaseline(), ResultSetReductionJIT::reduceOneEntryBaselineIdx(), ResultSetReductionJIT::reduceOneEntryNoCollisionsIdx(), Executor::ExecutionDispatch::run(), Executor::ExecutionDispatch::runImpl(), target_exprs_to_infos(), and ResultSet::updateStorageEntryCount().

166 { return query_desc_type_; }
QueryDescriptionType query_desc_type_

+ Here is the caller graph for this function:

std::unique_ptr< QueryExecutionContext > QueryMemoryDescriptor::getQueryExecutionContext ( const RelAlgExecutionUnit ra_exe_unit,
const Executor executor,
const ExecutorDeviceType  device_type,
const ExecutorDispatchMode  dispatch_mode,
const int  device_id,
const int64_t  num_rows,
const std::vector< std::vector< const int8_t * >> &  col_buffers,
const std::vector< std::vector< uint64_t >> &  frag_offsets,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
const bool  output_columnar,
const bool  sort_on_gpu,
RenderInfo render_info 
) const

Definition at line 610 of file QueryMemoryDescriptor.cpp.

References DEBUG_TIMER, and QueryExecutionContext.

Referenced by Executor::ExecutionDispatch::runImpl().

622  {
623  auto timer = DEBUG_TIMER(__func__);
624  if (frag_offsets.empty()) {
625  return nullptr;
626  }
627  return std::unique_ptr<QueryExecutionContext>(
628  new QueryExecutionContext(ra_exe_unit,
629  *this,
630  executor,
631  device_type,
632  dispatch_mode,
633  device_id,
634  num_rows,
635  col_buffers,
636  frag_offsets,
637  row_set_mem_owner,
638  output_columnar,
639  sort_on_gpu,
640  render_info));
641 }
const int8_t const int64_t * num_rows
#define DEBUG_TIMER(name)
Definition: Logger.h:296
void sort_on_gpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes, ThrustAllocator &alloc)

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getRowSize ( ) const

Definition at line 734 of file QueryMemoryDescriptor.cpp.

References align_to_int64(), CHECK(), CHECK_EQ, getColsSize(), getEffectiveKeyWidth(), group_col_widths_, keyless_hash_, and output_columnar_.

Referenced by QueryMemoryInitializer::applyStreamingTopNOffsetCpu(), QueryMemoryInitializer::applyStreamingTopNOffsetGpu(), GroupByAndAggregate::codegenGroupBy(), GroupByAndAggregate::codegenOutputSlot(), GroupByAndAggregate::codegenWindowRowPointer(), QueryMemoryInitializer::copyGroupByBuffersFromGpu(), create_dev_group_by_buffers(), getBufferSizeBytes(), getColOffInBytesInNextBin(), QueryMemoryInitializer::initGroups(), anonymous_namespace{Execute.cpp}::permute_storage_row_wise(), QueryMemoryDescriptor(), QueryMemoryInitializer::QueryMemoryInitializer(), ResultSetStorage::reduceSingleRow(), and sharedMemBytes().

734  {
736  size_t total_bytes{0};
737  if (keyless_hash_) {
738  CHECK_EQ(size_t(1), group_col_widths_.size());
739  } else {
740  total_bytes += group_col_widths_.size() * getEffectiveKeyWidth();
741  total_bytes = align_to_int64(total_bytes);
742  }
743  total_bytes += getColsSize();
744  return align_to_int64(total_bytes);
745 }
#define CHECK_EQ(x, y)
Definition: Logger.h:198
size_t getEffectiveKeyWidth() const
CHECK(cgen_state)
std::vector< int8_t > group_col_widths_
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getRowWidth ( ) const

Definition at line 1080 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getAllSlotsPaddedSize().

Referenced by get_row_bytes().

1080  {
1081  // Note: Actual row size may include padding (see ResultSetBufferAccessors.h)
1083 }
size_t getAllSlotsPaddedSize() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getSlotCount ( ) const
const int8_t QueryMemoryDescriptor::getSlotIndexForSingleSlotCol ( const size_t  col_idx) const

Definition at line 1069 of file QueryMemoryDescriptor.cpp.

References CHECK_EQ, col_slot_context_, and ColSlotContext::getSlotsForCol().

Referenced by QueryMemoryInitializer::allocateCountDistinctBuffers().

1070  {
1071  const auto& col_slots = col_slot_context_.getSlotsForCol(col_idx);
1072  CHECK_EQ(col_slots.size(), size_t(1));
1073  return col_slots.front();
1074 }
#define CHECK_EQ(x, y)
Definition: Logger.h:198
const std::vector< size_t > & getSlotsForCol(const size_t col_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ssize_t QueryMemoryDescriptor::getTargetGroupbyIndex ( const size_t  target_idx) const
inline

Definition at line 224 of file QueryMemoryDescriptor.h.

References CHECK_LT, and target_groupby_indices_.

Referenced by ResultSet::getTargetValueFromBufferColwise(), ResultSet::getTargetValueFromBufferRowwise(), ResultSetReductionJIT::reduceOneEntryBaseline(), ResultSetStorage::reduceOneEntrySlotsBaseline(), ResultSetReductionJIT::reduceOneEntryTargetsNoCollisions(), ResultSetReductionJIT::reduceOneSlot(), ResultSetStorage::reduceOneSlot(), and reductionKey().

224  {
225  CHECK_LT(target_idx, target_groupby_indices_.size());
226  return target_groupby_indices_[target_idx];
227  }
#define CHECK_LT(x, y)
Definition: Logger.h:200
std::vector< ssize_t > target_groupby_indices_

+ Here is the caller graph for this function:

int32_t QueryMemoryDescriptor::getTargetIdxForKey ( ) const
inline

Definition at line 179 of file QueryMemoryDescriptor.h.

References idx_target_as_key_.

Referenced by ResultSetReductionJIT::isEmpty(), ResultSetStorage::isEmptyEntry(), ResultSetStorage::isEmptyEntryColumnar(), query_group_by_template_impl(), ResultSetStorage::reduceSingleRow(), and reductionKey().

179 { return idx_target_as_key_; }

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getTotalBytesOfColumnarBuffers ( ) const
private

Returns the maximum total number of bytes (including required paddings) to store all non-lazy columns' results for columnar cases.

Definition at line 760 of file QueryMemoryDescriptor.cpp.

References CHECK(), col_slot_context_, entry_count_, ColSlotContext::getTotalBytesOfColumnarBuffers(), and output_columnar_.

Referenced by getBufferSizeBytes(), and getTotalBytesOfColumnarProjections().

760  {
763 }
CHECK(cgen_state)
size_t getTotalBytesOfColumnarBuffers(const size_t entry_count) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getTotalBytesOfColumnarBuffers ( const size_t  num_entries_per_column) const
private

This is a helper function that returns the total number of bytes (including required paddings) to store all non-lazy columns' results for columnar cases.

Definition at line 769 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getTotalBytesOfColumnarBuffers().

770  {
771  return col_slot_context_.getTotalBytesOfColumnarBuffers(num_entries_per_column);
772 }
size_t getTotalBytesOfColumnarBuffers(const size_t entry_count) const

+ Here is the call graph for this function:

size_t QueryMemoryDescriptor::getTotalBytesOfColumnarProjections ( const size_t  projection_count) const
private

Returns the effective total number of bytes from columnar projections, which includes 1) total number of bytes used to store all non-lazy columns 2) total number of bytes used to store row indices (for lazy fetches, etc.)

NOTE: this function does not represent the buffer sizes dedicated for the results, but the required memory to fill all valid results into a compact new buffer (with no holes in it)

Definition at line 783 of file QueryMemoryDescriptor.cpp.

References getTotalBytesOfColumnarBuffers().

784  {
785  constexpr size_t row_index_width = sizeof(int64_t);
786  return getTotalBytesOfColumnarBuffers(projection_count) +
787  row_index_width * projection_count;
788 }
size_t getTotalBytesOfColumnarBuffers() const

+ Here is the call graph for this function:

size_t QueryMemoryDescriptor::getWarpCount ( ) const

Definition at line 747 of file QueryMemoryDescriptor.cpp.

References executor_, and interleaved_bins_on_gpu_.

Referenced by getColOffInBytes(), getColOffInBytesInNextBin(), and getNextColOffInBytes().

747  {
748  return (interleaved_bins_on_gpu_ ? executor_->warpSize() : 1);
749 }

+ Here is the caller graph for this function:

int8_t QueryMemoryDescriptor::groupColWidth ( const size_t  key_idx) const
inline

Definition at line 182 of file QueryMemoryDescriptor.h.

References CHECK_LT, and group_col_widths_.

Referenced by ResultSetStorage::copyKeyColWise(), getPrependedGroupBufferSizeInBytes(), getPrependedGroupColOffInBytes(), ResultSetStorage::isEmptyEntryColumnar(), and anonymous_namespace{Execute.cpp}::permute_storage_columnar().

182  {
183  CHECK_LT(key_idx, group_col_widths_.size());
184  return group_col_widths_[key_idx];
185  }
#define CHECK_LT(x, y)
Definition: Logger.h:200
std::vector< int8_t > group_col_widths_

+ Here is the caller graph for this function:

const auto QueryMemoryDescriptor::groupColWidthsBegin ( ) const
inline

Definition at line 189 of file QueryMemoryDescriptor.h.

References group_col_widths_.

189 { return group_col_widths_.begin(); }
std::vector< int8_t > group_col_widths_
const auto QueryMemoryDescriptor::groupColWidthsEnd ( ) const
inline

Definition at line 190 of file QueryMemoryDescriptor.h.

References group_col_widths_.

190 { return group_col_widths_.end(); }
std::vector< int8_t > group_col_widths_
bool QueryMemoryDescriptor::hasInterleavedBinsOnGpu ( ) const
inline

Definition at line 176 of file QueryMemoryDescriptor.h.

References interleaved_bins_on_gpu_.

bool QueryMemoryDescriptor::hasNulls ( ) const
inline

Definition at line 249 of file QueryMemoryDescriptor.h.

References has_nulls_.

Referenced by GroupByAndAggregate::codegenGroupBy().

249 { return has_nulls_; }

+ Here is the caller graph for this function:

std::unique_ptr< QueryMemoryDescriptor > QueryMemoryDescriptor::init ( const Executor executor,
const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  query_infos,
const ColRangeInfo col_range_info,
const KeylessInfo keyless_info,
const bool  allow_multifrag,
const ExecutorDeviceType  device_type,
const int8_t  crt_min_byte_width,
const bool  sort_on_gpu_hint,
const size_t  shard_count,
const size_t  max_groups_buffer_entry_count,
RenderInfo render_info,
const CountDistinctDescriptors  count_distinct_descriptors,
const bool  must_use_baseline_sort,
const bool  output_columnar_hint 
)
static

Definition at line 150 of file QueryMemoryDescriptor.cpp.

References get_col_byte_widths(), and RelAlgExecutionUnit::groupby_exprs.

165  {
166  auto group_col_widths = get_col_byte_widths(ra_exe_unit.groupby_exprs, {});
167  const bool is_group_by{!group_col_widths.empty()};
168 
169  auto col_slot_context = ColSlotContext(ra_exe_unit.target_exprs, {});
170 
171  const auto min_slot_size = QueryMemoryDescriptor::pick_target_compact_width(
172  ra_exe_unit, query_infos, crt_min_byte_width);
173 
174  col_slot_context.setAllSlotsPaddedSize(min_slot_size);
175  col_slot_context.validate();
176 
177  if (!is_group_by) {
178  CHECK(!must_use_baseline_sort);
179 
180  return std::make_unique<QueryMemoryDescriptor>(
181  executor,
182  ra_exe_unit,
183  query_infos,
184  allow_multifrag,
185  false,
186  false,
187  -1,
188  ColRangeInfo{ra_exe_unit.estimator ? QueryDescriptionType::Estimator
190  0,
191  0,
192  0,
193  false},
194  col_slot_context,
195  std::vector<int8_t>{},
196  /*group_col_compact_width*/ 0,
197  std::vector<ssize_t>{},
198  /*entry_count*/ 1,
200  false,
201  count_distinct_descriptors,
202  false,
203  output_columnar_hint,
204  render_info && render_info->isPotentialInSituRender(),
205  must_use_baseline_sort);
206  }
207 
208  size_t entry_count = 1;
209  auto actual_col_range_info = col_range_info;
210  auto sharing = GroupByMemSharing::Shared;
211  bool interleaved_bins_on_gpu = false;
212  bool keyless_hash = false;
213  bool shared_mem_for_group_by = false;
214  int8_t group_col_compact_width = 0;
215  int32_t idx_target_as_key = -1;
216  auto output_columnar = output_columnar_hint;
217  std::vector<ssize_t> target_groupby_indices;
218 
219  switch (col_range_info.hash_type_) {
221  if (render_info) {
222  render_info->setInSituDataIfUnset(false);
223  }
224 
225  if (group_col_widths.size() > 1) {
226  // col range info max contains the expected cardinality of the output
227  entry_count = static_cast<size_t>(actual_col_range_info.max);
228  actual_col_range_info.bucket = 0;
229  } else {
230  // single column perfect hash
231  idx_target_as_key = keyless_info.target_index;
232  keyless_hash =
233  (!sort_on_gpu_hint ||
235  col_range_info.max, col_range_info.min, col_range_info.bucket)) &&
236  !col_range_info.bucket && !must_use_baseline_sort && keyless_info.keyless;
237  entry_count = std::max(
238  GroupByAndAggregate::getBucketedCardinality(col_range_info), int64_t(1));
239  const size_t interleaved_max_threshold{512};
240 
241  size_t gpu_smem_max_threshold{0};
242  if (device_type == ExecutorDeviceType::GPU) {
243  const auto cuda_mgr = executor->getCatalog()->getDataMgr().getCudaMgr();
244  CHECK(cuda_mgr);
245  /*
246  * We only use shared memory strategy if GPU hardware provides native shared
247  *memory atomics support. From CUDA Toolkit documentation:
248  *https://docs.nvidia.com/cuda/pascal-tuning-guide/index.html#atomic-ops "Like
249  *Maxwell, Pascal [and Volta] provides native shared memory atomic operations
250  *for 32-bit integer arithmetic, along with native 32 or 64-bit compare-and-swap
251  *(CAS)."
252  *
253  **/
254  if (cuda_mgr->isArchMaxwellOrLaterForAll()) {
255  // TODO(Saman): threshold should be eventually set as an optimized policy per
256  // architecture.
257  gpu_smem_max_threshold =
258  std::min((cuda_mgr->isArchVoltaForAll()) ? 4095LU : 2047LU,
259  (cuda_mgr->getMaxSharedMemoryForAll() / sizeof(int64_t) - 1));
260  }
261  }
262 
263  if (must_use_baseline_sort) {
264  target_groupby_indices = target_expr_group_by_indices(ra_exe_unit.groupby_exprs,
265  ra_exe_unit.target_exprs);
266  col_slot_context =
267  ColSlotContext(ra_exe_unit.target_exprs, target_groupby_indices);
268  }
269 
270  const auto group_expr = ra_exe_unit.groupby_exprs.front().get();
271  shared_mem_for_group_by =
272  g_enable_smem_group_by && keyless_hash && keyless_info.shared_mem_support &&
273  (entry_count <= gpu_smem_max_threshold) &&
276  count_distinct_descriptors) &&
277  !output_columnar; // TODO(Saman): add columnar support with the new smem
278  // support.
279 
280  bool has_varlen_sample_agg = false;
281  for (const auto& target_expr : ra_exe_unit.target_exprs) {
282  if (target_expr->get_contains_agg()) {
283  const auto agg_expr = dynamic_cast<Analyzer::AggExpr*>(target_expr);
284  CHECK(agg_expr);
285  if (agg_expr->get_aggtype() == kSAMPLE &&
286  agg_expr->get_type_info().is_varlen()) {
287  has_varlen_sample_agg = true;
288  break;
289  }
290  }
291  }
292 
293  interleaved_bins_on_gpu = keyless_hash && !has_varlen_sample_agg &&
294  (entry_count <= interleaved_max_threshold) &&
295  (device_type == ExecutorDeviceType::GPU) &&
297  count_distinct_descriptors) &&
298  !output_columnar;
299  }
300  break;
301  }
303  if (render_info) {
304  render_info->setInSituDataIfUnset(false);
305  }
306  entry_count = shard_count
307  ? (max_groups_buffer_entry_count + shard_count - 1) / shard_count
308  : max_groups_buffer_entry_count;
309  target_groupby_indices = target_expr_group_by_indices(ra_exe_unit.groupby_exprs,
310  ra_exe_unit.target_exprs);
311  col_slot_context = ColSlotContext(ra_exe_unit.target_exprs, target_groupby_indices);
312 
313  group_col_compact_width =
314  output_columnar ? 8
315  : pick_baseline_key_width(ra_exe_unit, query_infos, executor);
316 
317  actual_col_range_info =
319  break;
320  }
322  CHECK(!must_use_baseline_sort);
323 
324  if (use_streaming_top_n(ra_exe_unit, output_columnar)) {
325  entry_count = ra_exe_unit.sort_info.offset + ra_exe_unit.sort_info.limit;
326  } else {
327  if (ra_exe_unit.use_bump_allocator) {
328  output_columnar = false;
329  entry_count = 0;
330  } else {
331  entry_count = ra_exe_unit.scan_limit
332  ? static_cast<size_t>(ra_exe_unit.scan_limit)
333  : max_groups_buffer_entry_count;
334  }
335  }
336 
337  const auto catalog = executor->getCatalog();
338  CHECK(catalog);
339  target_groupby_indices = executor->plan_state_->allow_lazy_fetch_
340  ? target_expr_proj_indices(ra_exe_unit, *catalog)
341  : std::vector<ssize_t>{};
342 
343  col_slot_context = ColSlotContext(ra_exe_unit.target_exprs, target_groupby_indices);
344  break;
345  }
346  default:
347  UNREACHABLE() << "Unknown query type";
348  }
349 
350  return std::make_unique<QueryMemoryDescriptor>(
351  executor,
352  ra_exe_unit,
353  query_infos,
354  allow_multifrag,
355  keyless_hash,
356  interleaved_bins_on_gpu,
357  idx_target_as_key,
358  actual_col_range_info,
359  col_slot_context,
360  group_col_widths,
361  group_col_compact_width,
362  target_groupby_indices,
363  entry_count,
364  sharing,
365  shared_mem_for_group_by,
366  count_distinct_descriptors,
367  sort_on_gpu_hint,
368  output_columnar,
369  render_info && render_info->isPotentialInSituRender(),
370  must_use_baseline_sort);
371 }
std::vector< Analyzer::Expr * > target_exprs
static bool many_entries(const int64_t max_val, const int64_t min_val, const int64_t bucket)
bool use_streaming_top_n(const RelAlgExecutionUnit &ra_exe_unit, const bool output_columnar)
bool g_enable_smem_group_by
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
const bool shared_mem_support
const bool keyless
std::vector< int8_t > get_col_byte_widths(const T &col_expr_list, const std::vector< ssize_t > &col_exprs_to_not_project)
bool setInSituDataIfUnset(const bool is_in_situ_data)
Definition: RenderInfo.cpp:95
QueryDescriptionType hash_type_
#define UNREACHABLE()
Definition: Logger.h:234
static bool supportedExprForGpuSharedMemUsage(Analyzer::Expr *expr)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
CHECK(cgen_state)
std::vector< ssize_t > target_expr_group_by_indices(const std::list< std::shared_ptr< Analyzer::Expr >> &groupby_exprs, const std::vector< Analyzer::Expr * > &target_exprs)
static int8_t pick_target_compact_width(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const int8_t crt_min_byte_width)
const int32_t target_index
bool isPotentialInSituRender() const
Definition: RenderInfo.cpp:61
std::vector< ssize_t > target_expr_proj_indices(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &cat)
static bool countDescriptorsLogicallyEmpty(const CountDistinctDescriptors &count_distinct_descriptors)
int8_t pick_baseline_key_width(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const Executor *executor)

+ Here is the call graph for this function:

bool QueryMemoryDescriptor::interleavedBins ( const ExecutorDeviceType  device_type) const

Definition at line 1019 of file QueryMemoryDescriptor.cpp.

References GPU, and interleaved_bins_on_gpu_.

Referenced by canOutputColumnar(), GroupByAndAggregate::codegenSingleColumnPerfectHash(), getBufferSizeBytes(), QueryExecutionContext::groupBufferToResults(), and QueryMemoryInitializer::QueryMemoryInitializer().

1019  {
1020  return interleaved_bins_on_gpu_ && device_type == ExecutorDeviceType::GPU;
1021 }

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::isGroupBy ( ) const
inline

Definition at line 193 of file QueryMemoryDescriptor.h.

References group_col_widths_.

Referenced by anonymous_namespace{TargetExprBuilder.cpp}::get_initial_agg_val(), anonymous_namespace{OutputBufferInitialization.cpp}::init_agg_val_vec(), QueryMemoryInitializer::initColumnPerRow(), QueryExecutionContext::launchCpuCode(), QueryExecutionContext::launchGpuCode(), and QueryMemoryInitializer::QueryMemoryInitializer().

193 { return !group_col_widths_.empty(); }
std::vector< int8_t > group_col_widths_

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::isLogicalSizedColumnsAllowed ( ) const

Definition at line 968 of file QueryMemoryDescriptor.cpp.

References g_cluster, output_columnar_, Projection, and query_desc_type_.

Referenced by TargetExprCodegen::codegen(), TargetExprCodegenBuilder::codegenMultiSlotSampleExpressions(), TargetExprCodegenBuilder::codegenSlotEmptyKey(), anonymous_namespace{OutputBufferInitialization.cpp}::init_agg_val_vec(), ResultSet::makeTargetValue(), QueryMemoryDescriptor(), ResultSetStorage::reduceOneSlot(), and setOutputColumnar().

968  {
969  // In distributed mode, result sets are serialized using rowwise iterators, so we use
970  // consistent slot widths for now
971  return output_columnar_ && !g_cluster &&
973 }
bool g_cluster
QueryDescriptionType query_desc_type_

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::isSingleColumnGroupByWithPerfectHash ( ) const
inline

Definition at line 168 of file QueryMemoryDescriptor.h.

References getGroupbyColCount(), getQueryDescriptionType(), and GroupByPerfectHash.

Referenced by GroupByAndAggregate::codegenGroupBy(), and ResultSet::getTargetValueFromBufferRowwise().

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::isWarpSyncRequired ( const ExecutorDeviceType  device_type) const

Definition at line 1041 of file QueryMemoryDescriptor.cpp.

References CHECK(), executor_, and GPU.

Referenced by query_group_by_template_impl().

1042  {
1043  if (device_type != ExecutorDeviceType::GPU) {
1044  return false;
1045  } else {
1046  auto cuda_mgr = executor_->getCatalog()->getDataMgr().getCudaMgr();
1047  CHECK(cuda_mgr);
1048  return cuda_mgr->isArchVoltaForAll();
1049  }
1050 }
CHECK(cgen_state)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::lazyInitGroups ( const ExecutorDeviceType  device_type) const

Definition at line 1014 of file QueryMemoryDescriptor.cpp.

References count_distinct_descriptors_, countDescriptorsLogicallyEmpty(), GPU, and render_output_.

Referenced by create_dev_group_by_buffers(), QueryMemoryInitializer::QueryMemoryInitializer(), and toString().

1014  {
1015  return device_type == ExecutorDeviceType::GPU && !render_output_ &&
1017 }
CountDistinctDescriptors count_distinct_descriptors_
static bool countDescriptorsLogicallyEmpty(const CountDistinctDescriptors &count_distinct_descriptors)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static bool QueryMemoryDescriptor::many_entries ( const int64_t  max_val,
const int64_t  min_val,
const int64_t  bucket 
)
inlinestatic

Definition at line 140 of file QueryMemoryDescriptor.h.

Referenced by blocksShareMemory().

142  {
143  return max_val - min_val > 10000 * std::max(bucket, int64_t(1));
144  }

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::mustUseBaselineSort ( ) const
inline

Definition at line 268 of file QueryMemoryDescriptor.h.

References must_use_baseline_sort_.

Referenced by GroupByAndAggregate::codegenSingleColumnPerfectHash().

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::operator== ( const QueryMemoryDescriptor other) const

Definition at line 543 of file QueryMemoryDescriptor.cpp.

References bucket_, col_slot_context_, count_distinct_descriptors_, force_4byte_float_, group_col_compact_width_, group_col_widths_, has_nulls_, idx_target_as_key_, interleaved_bins_on_gpu_, keyless_hash_, max_val_, min_val_, output_columnar_, query_desc_type_, sharing_, sort_on_gpu_, and target_groupby_indices_.

543  {
544  // Note that this method does not check ptr reference members (e.g. executor_) or
545  // entry_count_
546  if (query_desc_type_ != other.query_desc_type_) {
547  return false;
548  }
549  if (keyless_hash_ != other.keyless_hash_) {
550  return false;
551  }
553  return false;
554  }
555  if (idx_target_as_key_ != other.idx_target_as_key_) {
556  return false;
557  }
558  if (force_4byte_float_ != other.force_4byte_float_) {
559  return false;
560  }
561  if (group_col_widths_ != other.group_col_widths_) {
562  return false;
563  }
565  return false;
566  }
568  return false;
569  }
570  if (min_val_ != other.min_val_) {
571  return false;
572  }
573  if (max_val_ != other.max_val_) {
574  return false;
575  }
576  if (bucket_ != other.bucket_) {
577  return false;
578  }
579  if (has_nulls_ != other.has_nulls_) {
580  return false;
581  }
582  if (sharing_ != other.sharing_) {
583  return false;
584  }
586  return false;
587  } else {
588  // Count distinct descriptors can legitimately differ in device only.
589  for (size_t i = 0; i < count_distinct_descriptors_.size(); ++i) {
590  auto ref_count_distinct_desc = other.count_distinct_descriptors_[i];
591  auto count_distinct_desc = count_distinct_descriptors_[i];
592  count_distinct_desc.device_type = ref_count_distinct_desc.device_type;
593  if (ref_count_distinct_desc != count_distinct_desc) {
594  return false;
595  }
596  }
597  }
598  if (sort_on_gpu_ != other.sort_on_gpu_) {
599  return false;
600  }
601  if (output_columnar_ != other.output_columnar_) {
602  return false;
603  }
604  if (col_slot_context_ != other.col_slot_context_) {
605  return false;
606  }
607  return true;
608 }
CountDistinctDescriptors count_distinct_descriptors_
QueryDescriptionType query_desc_type_
std::vector< int8_t > group_col_widths_
std::vector< ssize_t > target_groupby_indices_
int8_t QueryMemoryDescriptor::pick_target_compact_width ( const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  query_infos,
const int8_t  crt_min_byte_width 
)
static

Definition at line 643 of file QueryMemoryDescriptor.cpp.

References CHECK(), CHECK_EQ, g_bigint_count, get_col_byte_widths(), Analyzer::UOper::get_operand(), Analyzer::Expr::get_type_info(), RelAlgExecutionUnit::groupby_exprs, RelAlgExecutionUnit::input_col_descs, anonymous_namespace{QueryMemoryDescriptor.cpp}::is_int_and_no_bigger_than(), kCOUNT, kENCODING_DICT, kUNNEST, and RelAlgExecutionUnit::target_exprs.

646  {
647  if (g_bigint_count) {
648  return sizeof(int64_t);
649  }
650  int8_t compact_width{0};
651  auto col_it = ra_exe_unit.input_col_descs.begin();
652  int unnest_array_col_id{std::numeric_limits<int>::min()};
653  for (const auto groupby_expr : ra_exe_unit.groupby_exprs) {
654  const auto uoper = dynamic_cast<Analyzer::UOper*>(groupby_expr.get());
655  if (uoper && uoper->get_optype() == kUNNEST) {
656  const auto& arg_ti = uoper->get_operand()->get_type_info();
657  CHECK(arg_ti.is_array());
658  const auto& elem_ti = arg_ti.get_elem_type();
659  if (elem_ti.is_string() && elem_ti.get_compression() == kENCODING_DICT) {
660  unnest_array_col_id = (*col_it)->getColId();
661  } else {
662  compact_width = crt_min_byte_width;
663  break;
664  }
665  }
666  ++col_it;
667  }
668  if (!compact_width &&
669  (ra_exe_unit.groupby_exprs.size() != 1 || !ra_exe_unit.groupby_exprs.front())) {
670  compact_width = crt_min_byte_width;
671  }
672  if (!compact_width) {
673  col_it = ra_exe_unit.input_col_descs.begin();
674  std::advance(col_it, ra_exe_unit.groupby_exprs.size());
675  for (const auto target : ra_exe_unit.target_exprs) {
676  const auto& ti = target->get_type_info();
677  const auto agg = dynamic_cast<const Analyzer::AggExpr*>(target);
678  if (agg && agg->get_arg()) {
679  compact_width = crt_min_byte_width;
680  break;
681  }
682 
683  if (agg) {
684  CHECK_EQ(kCOUNT, agg->get_aggtype());
685  CHECK(!agg->get_is_distinct());
686  ++col_it;
687  continue;
688  }
689 
690  if (is_int_and_no_bigger_than(ti, 4) ||
691  (ti.is_string() && ti.get_compression() == kENCODING_DICT)) {
692  ++col_it;
693  continue;
694  }
695 
696  const auto uoper = dynamic_cast<Analyzer::UOper*>(target);
697  if (uoper && uoper->get_optype() == kUNNEST &&
698  (*col_it)->getColId() == unnest_array_col_id) {
699  const auto arg_ti = uoper->get_operand()->get_type_info();
700  CHECK(arg_ti.is_array());
701  const auto& elem_ti = arg_ti.get_elem_type();
702  if (elem_ti.is_string() && elem_ti.get_compression() == kENCODING_DICT) {
703  ++col_it;
704  continue;
705  }
706  }
707 
708  compact_width = crt_min_byte_width;
709  break;
710  }
711  }
712  if (!compact_width) {
713  size_t total_tuples{0};
714  for (const auto& qi : query_infos) {
715  total_tuples += qi.info.getNumTuples();
716  }
717  return total_tuples <= static_cast<size_t>(std::numeric_limits<uint32_t>::max()) ||
718  unnest_array_col_id != std::numeric_limits<int>::min()
719  ? 4
720  : crt_min_byte_width;
721  } else {
722  // TODO(miyu): relax this condition to allow more cases just w/o padding
723  for (auto wid : get_col_byte_widths(ra_exe_unit.target_exprs, {})) {
724  compact_width = std::max(compact_width, wid);
725  }
726  return compact_width;
727  }
728 }
std::vector< Analyzer::Expr * > target_exprs
#define CHECK_EQ(x, y)
Definition: Logger.h:198
std::vector< int8_t > get_col_byte_widths(const T &col_expr_list, const std::vector< ssize_t > &col_exprs_to_not_project)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
CHECK(cgen_state)
bool g_bigint_count
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
const Expr * get_operand() const
Definition: Analyzer.h:365
Definition: sqldefs.h:71
bool is_int_and_no_bigger_than(const SQLTypeInfo &ti, const size_t byte_width)
std::list< std::shared_ptr< const InputColDescriptor > > input_col_descs

+ Here is the call graph for this function:

std::string QueryMemoryDescriptor::reductionKey ( ) const

Definition at line 1155 of file QueryMemoryDescriptor.cpp.

References anonymous_namespace{QueryMemoryDescriptor.cpp}::boolToString(), col_slot_context_, getEffectiveKeyWidth(), getGroupbyColCount(), getTargetGroupbyIndex(), getTargetIdxForKey(), join(), keyless_hash_, query_desc_type_, anonymous_namespace{QueryMemoryDescriptor.cpp}::queryDescTypeToString(), targetGroupbyIndicesSize(), to_string(), and ColSlotContext::toString().

Referenced by ResultSetReductionJIT::cacheKey(), and toString().

1155  {
1156  std::string str;
1157  str += "Query Memory Descriptor State\n";
1158  str += "\tQuery Type: " + queryDescTypeToString(query_desc_type_) + "\n";
1159  str +=
1160  "\tKeyless Hash: " + boolToString(keyless_hash_) +
1161  (keyless_hash_ ? ", target index for key: " + std::to_string(getTargetIdxForKey())
1162  : "") +
1163  "\n";
1164  str += "\tEffective key width: " + std::to_string(getEffectiveKeyWidth()) + "\n";
1165  str += "\tNumber of group columns: " + std::to_string(getGroupbyColCount()) + "\n";
1166  const auto group_indices_size = targetGroupbyIndicesSize();
1167  if (group_indices_size) {
1168  std::vector<std::string> group_indices_strings;
1169  for (size_t target_idx = 0; target_idx < group_indices_size; ++target_idx) {
1170  group_indices_strings.push_back(std::to_string(getTargetGroupbyIndex(target_idx)));
1171  }
1172  str += "\tTarget group by indices: " +
1173  boost::algorithm::join(group_indices_strings, ",");
1174  }
1175  str += "\t" + col_slot_context_.toString();
1176  return str;
1177 }
std::string join(T const &container, std::string const &delim)
size_t getEffectiveKeyWidth() const
std::string to_string(char const *&&v)
std::string queryDescTypeToString(const QueryDescriptionType val)
size_t getGroupbyColCount() const
size_t targetGroupbyIndicesSize() const
ssize_t getTargetGroupbyIndex(const size_t target_idx) const
QueryDescriptionType query_desc_type_
std::string toString() const
int32_t getTargetIdxForKey() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void QueryMemoryDescriptor::resetGroupColWidths ( const std::vector< int8_t > &  new_group_col_widths)
inlineprotected

Definition at line 324 of file QueryMemoryDescriptor.h.

References group_col_widths_.

324  {
325  group_col_widths_ = new_group_col_widths;
326  }
std::vector< int8_t > group_col_widths_
void QueryMemoryDescriptor::setAllTargetGroupbyIndices ( std::vector< ssize_t >  group_by_indices)
inline

Definition at line 229 of file QueryMemoryDescriptor.h.

References target_groupby_indices_.

229  {
230  target_groupby_indices_ = group_by_indices;
231  }
std::vector< ssize_t > target_groupby_indices_
void QueryMemoryDescriptor::setEntryCount ( const size_t  val)
inline

Definition at line 243 of file QueryMemoryDescriptor.h.

References entry_count_.

Referenced by Executor::reduceMultiDeviceResultSets(), ResultSetStorage::updateEntryCount(), and ResultSet::updateStorageEntryCount().

243 { entry_count_ = val; }

+ Here is the caller graph for this function:

void QueryMemoryDescriptor::setForceFourByteFloat ( const bool  val)
inline

Definition at line 273 of file QueryMemoryDescriptor.h.

References force_4byte_float_.

void QueryMemoryDescriptor::setGroupColCompactWidth ( const int8_t  val)
inline

Definition at line 195 of file QueryMemoryDescriptor.h.

References group_col_compact_width_.

void QueryMemoryDescriptor::setHasInterleavedBinsOnGpu ( const bool  val)
inline

Definition at line 177 of file QueryMemoryDescriptor.h.

References interleaved_bins_on_gpu_.

void QueryMemoryDescriptor::setHasKeylessHash ( const bool  val)
inline

Definition at line 174 of file QueryMemoryDescriptor.h.

References keyless_hash_.

void QueryMemoryDescriptor::setOutputColumnar ( const bool  val)

Definition at line 957 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, isLogicalSizedColumnsAllowed(), output_columnar_, and ColSlotContext::setAllSlotsPaddedSizeToLogicalSize().

Referenced by TableFunctionExecutionContext::launchCpuCode(), and TableFunctionExecutionContext::launchGpuCode().

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void QueryMemoryDescriptor::setQueryDescriptionType ( const QueryDescriptionType  val)
inline

Definition at line 167 of file QueryMemoryDescriptor.h.

References query_desc_type_.

167 { query_desc_type_ = val; }
QueryDescriptionType query_desc_type_
void QueryMemoryDescriptor::setTargetIdxForKey ( const int32_t  val)
inline

Definition at line 180 of file QueryMemoryDescriptor.h.

References idx_target_as_key_.

size_t QueryMemoryDescriptor::sharedMemBytes ( const ExecutorDeviceType  device_type) const

Definition at line 1023 of file QueryMemoryDescriptor.cpp.

References CHECK(), CHECK_EQ, CPU, entry_count_, executor_, getRowSize(), GPU, SharedForKeylessOneColumnKnownRange, and sharing_.

Referenced by blocksShareMemory(), QueryExecutionContext::launchGpuCode(), and query_group_by_template_impl().

1023  {
1024  CHECK(device_type == ExecutorDeviceType::CPU || device_type == ExecutorDeviceType::GPU);
1025  if (device_type == ExecutorDeviceType::CPU) {
1026  return 0;
1027  }
1028  // if performing keyless aggregate query with a single column group-by:
1030  CHECK_EQ(getRowSize(),
1031  sizeof(int64_t)); // Currently just designed for this scenario
1032  size_t shared_mem_size =
1033  (/*bin_count=*/entry_count_ + 1) * sizeof(int64_t); // one extra for NULL values
1034  CHECK(shared_mem_size <=
1035  executor_->getCatalog()->getDataMgr().getCudaMgr()->getMaxSharedMemoryForAll());
1036  return shared_mem_size;
1037  }
1038  return 0;
1039 }
#define CHECK_EQ(x, y)
Definition: Logger.h:198
CHECK(cgen_state)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::sortOnGpu ( ) const
inline

Definition at line 260 of file QueryMemoryDescriptor.h.

References sort_on_gpu_.

Referenced by alignPaddedSlots(), QueryExecutionContext::launchGpuCode(), Executor::ExecutionDispatch::runImpl(), and use_speculative_top_n().

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::targetGroupbyIndicesSize ( ) const
inline
size_t QueryMemoryDescriptor::targetGroupbyNegativeIndicesSize ( ) const
inline

Definition at line 234 of file QueryMemoryDescriptor.h.

References target_groupby_indices_.

234  {
235  return std::count_if(
236  target_groupby_indices_.begin(),
238  [](const ssize_t& target_group_by_index) { return target_group_by_index < 0; });
239  }
std::vector< ssize_t > target_groupby_indices_
bool QueryMemoryDescriptor::threadsShareMemory ( ) const
std::string QueryMemoryDescriptor::toString ( ) const

Definition at line 1135 of file QueryMemoryDescriptor.cpp.

References allow_multifrag_, blocksShareMemory(), anonymous_namespace{QueryMemoryDescriptor.cpp}::boolToString(), bucket_, entry_count_, GPU, interleaved_bins_on_gpu_, lazyInitGroups(), max_val_, min_val_, must_use_baseline_sort_, output_columnar_, reductionKey(), render_output_, sort_on_gpu_, threadsShareMemory(), to_string(), and usesGetGroupValueFast().

Referenced by Executor::dispatchFragments().

1135  {
1136  auto str = reductionKey();
1137  str += "\tAllow Multifrag: " + boolToString(allow_multifrag_) + "\n";
1138  str += "\tInterleaved Bins on GPU: " + boolToString(interleaved_bins_on_gpu_) + "\n";
1139  str += "\tBlocks Share Memory: " + boolToString(blocksShareMemory()) + "\n";
1140  str += "\tThreads Share Memory: " + boolToString(threadsShareMemory()) + "\n";
1141  str += "\tUses Fast Group Values: " + boolToString(usesGetGroupValueFast()) + "\n";
1142  str += "\tLazy Init Groups (GPU): " +
1144  str += "\tEntry Count: " + std::to_string(entry_count_) + "\n";
1145  str += "\tMin Val (perfect hash only): " + std::to_string(min_val_) + "\n";
1146  str += "\tMax Val (perfect hash only): " + std::to_string(max_val_) + "\n";
1147  str += "\tBucket Val (perfect hash only): " + std::to_string(bucket_) + "\n";
1148  str += "\tSort on GPU: " + boolToString(sort_on_gpu_) + "\n";
1149  str += "\tOutput Columnar: " + boolToString(output_columnar_) + "\n";
1150  str += "\tRender Output: " + boolToString(render_output_) + "\n";
1151  str += "\tUse Baseline Sort: " + boolToString(must_use_baseline_sort_) + "\n";
1152  return str;
1153 }
std::string to_string(char const *&&v)
bool lazyInitGroups(const ExecutorDeviceType) const
std::string reductionKey() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static TResultSetBufferDescriptor QueryMemoryDescriptor::toThrift ( const QueryMemoryDescriptor )
static
int8_t QueryMemoryDescriptor::updateActualMinByteWidth ( const int8_t  actual_min_byte_width) const

Definition at line 1085 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getMinPaddedByteSize().

1086  {
1087  return col_slot_context_.getMinPaddedByteSize(actual_min_byte_width);
1088 }
int8_t getMinPaddedByteSize(const int8_t actual_min_byte_width) const

+ Here is the call graph for this function:

void QueryMemoryDescriptor::useConsistentSlotWidthSize ( const int8_t  slot_width_size)

Definition at line 1076 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::setAllSlotsSize().

1076  {
1077  col_slot_context_.setAllSlotsSize(slot_width_size);
1078 }
void setAllSlotsSize(const int8_t slot_width_size)

+ Here is the call graph for this function:

bool QueryMemoryDescriptor::usesGetGroupValueFast ( ) const

Definition at line 986 of file QueryMemoryDescriptor.cpp.

References getGroupbyColCount(), GroupByPerfectHash, and query_desc_type_.

Referenced by canOutputColumnar(), GroupByAndAggregate::codegen(), GroupByAndAggregate::codegenSingleColumnPerfectHash(), and toString().

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Friends And Related Function Documentation

template<typename META_CLASS_TYPE >
friend class AggregateReductionEgress
friend

Definition at line 369 of file QueryMemoryDescriptor.h.

friend class QueryExecutionContext
friend

Definition at line 366 of file QueryMemoryDescriptor.h.

Referenced by getQueryExecutionContext().

friend class ResultSet
friend

Definition at line 365 of file QueryMemoryDescriptor.h.

Member Data Documentation

bool QueryMemoryDescriptor::allow_multifrag_
private

Definition at line 330 of file QueryMemoryDescriptor.h.

Referenced by toString().

int64_t QueryMemoryDescriptor::bucket_
private

Definition at line 347 of file QueryMemoryDescriptor.h.

Referenced by blocksShareMemory(), getBucket(), operator==(), and toString().

size_t QueryMemoryDescriptor::entry_count_
private
const Executor* QueryMemoryDescriptor::executor_
private
bool QueryMemoryDescriptor::force_4byte_float_
private

Definition at line 357 of file QueryMemoryDescriptor.h.

Referenced by forceFourByteFloat(), operator==(), and setForceFourByteFloat().

int8_t QueryMemoryDescriptor::group_col_compact_width_
private
bool QueryMemoryDescriptor::has_nulls_
private

Definition at line 348 of file QueryMemoryDescriptor.h.

Referenced by hasNulls(), and operator==().

int32_t QueryMemoryDescriptor::idx_target_as_key_
private

Definition at line 334 of file QueryMemoryDescriptor.h.

Referenced by getTargetIdxForKey(), operator==(), and setTargetIdxForKey().

bool QueryMemoryDescriptor::interleaved_bins_on_gpu_
private
bool QueryMemoryDescriptor::is_table_function_
private

Definition at line 355 of file QueryMemoryDescriptor.h.

Referenced by blocksShareMemory().

bool QueryMemoryDescriptor::keyless_hash_
private
int64_t QueryMemoryDescriptor::max_val_
private

Definition at line 346 of file QueryMemoryDescriptor.h.

Referenced by blocksShareMemory(), getMaxVal(), operator==(), and toString().

int64_t QueryMemoryDescriptor::min_val_
private

Definition at line 344 of file QueryMemoryDescriptor.h.

Referenced by blocksShareMemory(), getMinVal(), operator==(), and toString().

bool QueryMemoryDescriptor::must_use_baseline_sort_
private

Definition at line 354 of file QueryMemoryDescriptor.h.

Referenced by mustUseBaselineSort(), and toString().

bool QueryMemoryDescriptor::render_output_
private

Definition at line 353 of file QueryMemoryDescriptor.h.

Referenced by blocksShareMemory(), lazyInitGroups(), and toString().

GroupByMemSharing QueryMemoryDescriptor::sharing_
private
bool QueryMemoryDescriptor::sort_on_gpu_
private

Definition at line 351 of file QueryMemoryDescriptor.h.

Referenced by operator==(), QueryMemoryDescriptor(), sortOnGpu(), and toString().

std::vector<ssize_t> QueryMemoryDescriptor::target_groupby_indices_
private

The documentation for this class was generated from the following files: