OmniSciDB  85c2d10cdc
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
QueryMemoryDescriptor Class Reference

#include <QueryMemoryDescriptor.h>

+ Collaboration diagram for QueryMemoryDescriptor:

Public Member Functions

 QueryMemoryDescriptor ()
 
 QueryMemoryDescriptor (const Executor *executor, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const bool allow_multifrag, const bool keyless_hash, const bool interleaved_bins_on_gpu, const int32_t idx_target_as_key, const ColRangeInfo &col_range_info, const ColSlotContext &col_slot_context, const std::vector< int8_t > &group_col_widths, const int8_t group_col_compact_width, const std::vector< int64_t > &target_groupby_indices, const size_t entry_count, const CountDistinctDescriptors count_distinct_descriptors, const bool sort_on_gpu_hint, const bool output_columnar, const bool render_output, const bool must_use_baseline_sort, const bool use_streaming_top_n)
 
 QueryMemoryDescriptor (const Executor *executor, const size_t entry_count, const QueryDescriptionType query_desc_type, const bool is_table_function)
 
 QueryMemoryDescriptor (const QueryDescriptionType query_desc_type, const int64_t min_val, const int64_t max_val, const bool has_nulls, const std::vector< int8_t > &group_col_widths)
 
 QueryMemoryDescriptor (const TResultSetBufferDescriptor &thrift_query_memory_descriptor)
 
bool operator== (const QueryMemoryDescriptor &other) const
 
std::unique_ptr
< QueryExecutionContext
getQueryExecutionContext (const RelAlgExecutionUnit &, const Executor *executor, const ExecutorDeviceType device_type, const ExecutorDispatchMode dispatch_mode, const int device_id, const int64_t num_rows, const std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< std::vector< uint64_t >> &frag_offsets, std::shared_ptr< RowSetMemoryOwner >, const bool output_columnar, const bool sort_on_gpu, const size_t thread_idx, RenderInfo *) const
 
bool countDistinctDescriptorsLogicallyEmpty () const
 
const ExecutorgetExecutor () const
 
QueryDescriptionType getQueryDescriptionType () const
 
void setQueryDescriptionType (const QueryDescriptionType val)
 
bool isSingleColumnGroupByWithPerfectHash () const
 
bool hasKeylessHash () const
 
void setHasKeylessHash (const bool val)
 
bool hasInterleavedBinsOnGpu () const
 
void setHasInterleavedBinsOnGpu (const bool val)
 
int32_t getTargetIdxForKey () const
 
void setTargetIdxForKey (const int32_t val)
 
int8_t groupColWidth (const size_t key_idx) const
 
size_t getPrependedGroupColOffInBytes (const size_t group_idx) const
 
size_t getPrependedGroupBufferSizeInBytes () const
 
const auto groupColWidthsBegin () const
 
const auto groupColWidthsEnd () const
 
void clearGroupColWidths ()
 
bool isGroupBy () const
 
void setGroupColCompactWidth (const int8_t val)
 
size_t getColCount () const
 
size_t getSlotCount () const
 
const int8_t getPaddedSlotWidthBytes (const size_t slot_idx) const
 
const int8_t getLogicalSlotWidthBytes (const size_t slot_idx) const
 
const int8_t getSlotIndexForSingleSlotCol (const size_t col_idx) const
 
size_t getPaddedColWidthForRange (const size_t offset, const size_t range) const
 
void useConsistentSlotWidthSize (const int8_t slot_width_size)
 
size_t getRowWidth () const
 
int8_t updateActualMinByteWidth (const int8_t actual_min_byte_width) const
 
void addColSlotInfo (const std::vector< std::tuple< int8_t, int8_t >> &slots_for_col)
 
void clearSlotInfo ()
 
void alignPaddedSlots ()
 
int64_t getTargetGroupbyIndex (const size_t target_idx) const
 
void setAllTargetGroupbyIndices (std::vector< int64_t > group_by_indices)
 
size_t targetGroupbyIndicesSize () const
 
size_t targetGroupbyNegativeIndicesSize () const
 
void clearTargetGroupbyIndices ()
 
size_t getEntryCount () const
 
void setEntryCount (const size_t val)
 
int64_t getMinVal () const
 
int64_t getMaxVal () const
 
int64_t getBucket () const
 
bool hasNulls () const
 
const CountDistinctDescriptorgetCountDistinctDescriptor (const size_t idx) const
 
size_t getCountDistinctDescriptorsSize () const
 
bool sortOnGpu () const
 
bool canOutputColumnar () const
 
bool didOutputColumnar () const
 
void setOutputColumnar (const bool val)
 
bool useStreamingTopN () const
 
bool isLogicalSizedColumnsAllowed () const
 
bool mustUseBaselineSort () const
 
bool forceFourByteFloat () const
 
void setForceFourByteFloat (const bool val)
 
size_t getGroupbyColCount () const
 
size_t getKeyCount () const
 
size_t getBufferColSlotCount () const
 
size_t getBufferSizeBytes (const RelAlgExecutionUnit &ra_exe_unit, const unsigned thread_count, const ExecutorDeviceType device_type) const
 
size_t getBufferSizeBytes (const ExecutorDeviceType device_type) const
 
size_t getBufferSizeBytes (const ExecutorDeviceType device_type, const size_t override_entry_count) const
 
const ColSlotContextgetColSlotContext () const
 
bool usesGetGroupValueFast () const
 
bool blocksShareMemory () const
 
bool threadsShareMemory () const
 
bool lazyInitGroups (const ExecutorDeviceType) const
 
bool interleavedBins (const ExecutorDeviceType) const
 
size_t getColOffInBytes (const size_t col_idx) const
 
size_t getColOffInBytesInNextBin (const size_t col_idx) const
 
size_t getNextColOffInBytes (const int8_t *col_ptr, const size_t bin, const size_t col_idx) const
 
size_t getNextColOffInBytesRowOnly (const int8_t *col_ptr, const size_t col_idx) const
 
size_t getColOnlyOffInBytes (const size_t col_idx) const
 
size_t getRowSize () const
 
size_t getColsSize () const
 
size_t getWarpCount () const
 
size_t getCompactByteWidth () const
 
size_t getEffectiveKeyWidth () const
 
bool isWarpSyncRequired (const ExecutorDeviceType) const
 
std::string queryDescTypeToString () const
 
std::string toString () const
 
std::string reductionKey () const
 

Static Public Member Functions

static TResultSetBufferDescriptor toThrift (const QueryMemoryDescriptor &)
 
static std::unique_ptr
< QueryMemoryDescriptor
init (const Executor *executor, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const ColRangeInfo &col_range_info, const KeylessInfo &keyless_info, const bool allow_multifrag, const ExecutorDeviceType device_type, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, const size_t shard_count, const size_t max_groups_buffer_entry_count, RenderInfo *render_info, const CountDistinctDescriptors count_distinct_descriptors, const bool must_use_baseline_sort, const bool output_columnar_hint, const bool streaming_top_n_hint)
 
static bool many_entries (const int64_t max_val, const int64_t min_val, const int64_t bucket)
 
static bool countDescriptorsLogicallyEmpty (const CountDistinctDescriptors &count_distinct_descriptors)
 
static int8_t pick_target_compact_width (const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const int8_t crt_min_byte_width)
 

Protected Member Functions

void resetGroupColWidths (const std::vector< int8_t > &new_group_col_widths)
 

Private Member Functions

size_t getTotalBytesOfColumnarBuffers () const
 
size_t getTotalBytesOfColumnarBuffers (const size_t num_entries_per_column) const
 
size_t getTotalBytesOfColumnarProjections (const size_t projection_count) const
 

Private Attributes

const Executorexecutor_
 
bool allow_multifrag_
 
QueryDescriptionType query_desc_type_
 
bool keyless_hash_
 
bool interleaved_bins_on_gpu_
 
int32_t idx_target_as_key_
 
std::vector< int8_t > group_col_widths_
 
int8_t group_col_compact_width_
 
std::vector< int64_t > target_groupby_indices_
 
size_t entry_count_
 
int64_t min_val_
 
int64_t max_val_
 
int64_t bucket_
 
bool has_nulls_
 
CountDistinctDescriptors count_distinct_descriptors_
 
bool sort_on_gpu_
 
bool output_columnar_
 
bool render_output_
 
bool must_use_baseline_sort_
 
bool is_table_function_
 
bool use_streaming_top_n_
 
bool force_4byte_float_
 
ColSlotContext col_slot_context_
 

Friends

class ResultSet
 
class QueryExecutionContext
 

Detailed Description

Definition at line 68 of file QueryMemoryDescriptor.h.

Constructor & Destructor Documentation

QueryMemoryDescriptor::QueryMemoryDescriptor ( )

Definition at line 483 of file QueryMemoryDescriptor.cpp.

References Projection.

484  : executor_(nullptr)
485  , allow_multifrag_(false)
487  , keyless_hash_(false)
488  , interleaved_bins_on_gpu_(false)
489  , idx_target_as_key_(0)
491  , entry_count_(0)
492  , min_val_(0)
493  , max_val_(0)
494  , bucket_(0)
495  , has_nulls_(false)
496  , sort_on_gpu_(false)
497  , output_columnar_(false)
498  , render_output_(false)
499  , must_use_baseline_sort_(false)
500  , is_table_function_(false)
501  , use_streaming_top_n_(false)
502  , force_4byte_float_(false) {}
QueryDescriptionType query_desc_type_
QueryMemoryDescriptor::QueryMemoryDescriptor ( const Executor executor,
const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  query_infos,
const bool  allow_multifrag,
const bool  keyless_hash,
const bool  interleaved_bins_on_gpu,
const int32_t  idx_target_as_key,
const ColRangeInfo col_range_info,
const ColSlotContext col_slot_context,
const std::vector< int8_t > &  group_col_widths,
const int8_t  group_col_compact_width,
const std::vector< int64_t > &  target_groupby_indices,
const size_t  entry_count,
const CountDistinctDescriptors  count_distinct_descriptors,
const bool  sort_on_gpu_hint,
const bool  output_columnar,
const bool  render_output,
const bool  must_use_baseline_sort,
const bool  use_streaming_top_n 
)

Definition at line 386 of file QueryMemoryDescriptor.cpp.

References anonymous_namespace{QueryMemoryDescriptor.cpp}::anyOf(), canOutputColumnar(), CHECK, col_slot_context_, count_distinct_descriptors_, countDescriptorsLogicallyEmpty(), executor_, streaming_top_n::get_heap_size(), getEntryCount(), getRowSize(), GroupByBaselineHash, GroupByPerfectHash, isLogicalSizedColumnsAllowed(), kAPPROX_MEDIAN, keyless_hash_, NonGroupedAggregate, output_columnar_, Projection, query_desc_type_, ColSlotContext::setAllSlotsPaddedSizeToLogicalSize(), ColSlotContext::setAllUnsetSlotsPaddedSize(), sort_on_gpu_, RelAlgExecutionUnit::target_exprs, RelAlgExecutionUnit::use_bump_allocator, use_streaming_top_n_, and ColSlotContext::validate().

406  : executor_(executor)
407  , allow_multifrag_(allow_multifrag)
408  , query_desc_type_(col_range_info.hash_type_)
409  , keyless_hash_(keyless_hash)
410  , interleaved_bins_on_gpu_(interleaved_bins_on_gpu)
411  , idx_target_as_key_(idx_target_as_key)
412  , group_col_widths_(group_col_widths)
413  , group_col_compact_width_(group_col_compact_width)
414  , target_groupby_indices_(target_groupby_indices)
415  , entry_count_(entry_count)
416  , min_val_(col_range_info.min)
417  , max_val_(col_range_info.max)
418  , bucket_(col_range_info.bucket)
419  , has_nulls_(col_range_info.has_nulls)
420  , count_distinct_descriptors_(count_distinct_descriptors)
421  , output_columnar_(false)
422  , render_output_(render_output)
423  , must_use_baseline_sort_(must_use_baseline_sort)
424  , is_table_function_(false)
426  , force_4byte_float_(false)
427  , col_slot_context_(col_slot_context) {
430 
431  sort_on_gpu_ = sort_on_gpu_hint && canOutputColumnar() && !keyless_hash_;
432 
433  if (sort_on_gpu_) {
434  CHECK(!ra_exe_unit.use_bump_allocator);
435  output_columnar_ = true;
436  } else {
437  switch (query_desc_type_) {
439  output_columnar_ = output_columnar_hint;
440  break;
442  output_columnar_ = output_columnar_hint &&
445  !anyOf(ra_exe_unit.target_exprs, kAPPROX_MEDIAN);
446  break;
448  output_columnar_ = output_columnar_hint;
449  break;
451  output_columnar_ = output_columnar_hint &&
454  !anyOf(ra_exe_unit.target_exprs, kAPPROX_MEDIAN);
455  break;
456  default:
457  output_columnar_ = false;
458  break;
459  }
460  }
461 
463  // TODO(adb): Ensure fixed size buffer allocations are correct with all logical column
464  // sizes
465  CHECK(!ra_exe_unit.use_bump_allocator);
468  }
469 
470 #ifdef HAVE_CUDA
471  // Check Streaming Top N heap usage, bail if > max slab size, CUDA ONLY
472  if (use_streaming_top_n_ && executor->catalog_->getDataMgr().gpusPresent()) {
473  const auto thread_count = executor->blockSize() * executor->gridSize();
474  const auto total_buff_size =
476  if (total_buff_size > executor_->maxGpuSlabSize()) {
477  throw StreamingTopNOOM(total_buff_size);
478  }
479  }
480 #endif
481 }
std::vector< Analyzer::Expr * > target_exprs
bool isLogicalSizedColumnsAllowed() const
QueryDescriptionType hash_type_
bool use_streaming_top_n(const RelAlgExecutionUnit &ra_exe_unit, const bool output_columnar)
std::vector< int64_t > target_groupby_indices_
CountDistinctDescriptors count_distinct_descriptors_
void validate() const
bool anyOf(std::vector< Analyzer::Expr * > const &target_exprs, SQLAgg const agg_kind)
QueryDescriptionType query_desc_type_
size_t get_heap_size(const size_t row_size, const size_t n, const size_t thread_count)
void setAllSlotsPaddedSizeToLogicalSize()
#define CHECK(condition)
Definition: Logger.h:197
std::vector< int8_t > group_col_widths_
static bool countDescriptorsLogicallyEmpty(const CountDistinctDescriptors &count_distinct_descriptors)
void setAllUnsetSlotsPaddedSize(const int8_t padded_size)

+ Here is the call graph for this function:

QueryMemoryDescriptor::QueryMemoryDescriptor ( const Executor executor,
const size_t  entry_count,
const QueryDescriptionType  query_desc_type,
const bool  is_table_function 
)

Definition at line 504 of file QueryMemoryDescriptor.cpp.

508  : executor_(executor)
509  , allow_multifrag_(false)
510  , query_desc_type_(query_desc_type)
511  , keyless_hash_(false)
512  , interleaved_bins_on_gpu_(false)
513  , idx_target_as_key_(0)
515  , entry_count_(entry_count)
516  , min_val_(0)
517  , max_val_(0)
518  , bucket_(0)
519  , has_nulls_(false)
520  , sort_on_gpu_(false)
521  , output_columnar_(false)
522  , render_output_(false)
523  , must_use_baseline_sort_(false)
524  , is_table_function_(is_table_function)
525  , use_streaming_top_n_(false)
526  , force_4byte_float_(false) {}
QueryDescriptionType query_desc_type_
QueryMemoryDescriptor::QueryMemoryDescriptor ( const QueryDescriptionType  query_desc_type,
const int64_t  min_val,
const int64_t  max_val,
const bool  has_nulls,
const std::vector< int8_t > &  group_col_widths 
)

Definition at line 528 of file QueryMemoryDescriptor.cpp.

533  : executor_(nullptr)
534  , allow_multifrag_(false)
535  , query_desc_type_(query_desc_type)
536  , keyless_hash_(false)
537  , interleaved_bins_on_gpu_(false)
538  , idx_target_as_key_(0)
539  , group_col_widths_(group_col_widths)
541  , entry_count_(0)
542  , min_val_(min_val)
543  , max_val_(max_val)
544  , bucket_(0)
545  , has_nulls_(false)
546  , sort_on_gpu_(false)
547  , output_columnar_(false)
548  , render_output_(false)
549  , must_use_baseline_sort_(false)
550  , is_table_function_(false)
551  , use_streaming_top_n_(false)
552  , force_4byte_float_(false) {}
QueryDescriptionType query_desc_type_
std::vector< int8_t > group_col_widths_
QueryMemoryDescriptor::QueryMemoryDescriptor ( const TResultSetBufferDescriptor &  thrift_query_memory_descriptor)

Member Function Documentation

void QueryMemoryDescriptor::addColSlotInfo ( const std::vector< std::tuple< int8_t, int8_t >> &  slots_for_col)

Definition at line 1101 of file QueryMemoryDescriptor.cpp.

References ColSlotContext::addColumn(), and col_slot_context_.

Referenced by ResultSetLogicalValuesBuilder::create(), TableFunctionExecutionContext::launchCpuCode(), and TableFunctionExecutionContext::launchGpuCode().

1102  {
1103  col_slot_context_.addColumn(slots_for_col);
1104 }
void addColumn(const std::vector< std::tuple< int8_t, int8_t >> &slots_for_col)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void QueryMemoryDescriptor::alignPaddedSlots ( )

Definition at line 1110 of file QueryMemoryDescriptor.cpp.

References ColSlotContext::alignPaddedSlots(), col_slot_context_, and sortOnGpu().

1110  {
1112 }
void alignPaddedSlots(const bool sort_on_gpu)

+ Here is the call graph for this function:

bool QueryMemoryDescriptor::blocksShareMemory ( ) const

Definition at line 1024 of file QueryMemoryDescriptor.cpp.

References bucket_, count_distinct_descriptors_, countDescriptorsLogicallyEmpty(), executor_, g_cluster, getGroupbyColCount(), GroupByBaselineHash, GroupByPerfectHash, is_table_function_, many_entries(), max_val_, min_val_, Projection, query_desc_type_, and render_output_.

Referenced by canOutputColumnar(), ResultSetReductionJIT::codegen(), QueryMemoryInitializer::computeNumberOfBuffers(), copy_group_by_buffers_from_gpu(), create_dev_group_by_buffers(), and toString().

1024  {
1025  if (g_cluster || is_table_function_) {
1026  return true;
1027  }
1029  return true;
1030  }
1031  if (executor_->isCPUOnly() || render_output_ ||
1035  getGroupbyColCount() > 1)) {
1036  return true;
1037  }
1040 }
static bool many_entries(const int64_t max_val, const int64_t min_val, const int64_t bucket)
size_t getGroupbyColCount() const
CountDistinctDescriptors count_distinct_descriptors_
QueryDescriptionType query_desc_type_
bool g_cluster
static bool countDescriptorsLogicallyEmpty(const CountDistinctDescriptors &count_distinct_descriptors)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::canOutputColumnar ( ) const

Definition at line 1114 of file QueryMemoryDescriptor.cpp.

References blocksShareMemory(), count_distinct_descriptors_, countDescriptorsLogicallyEmpty(), GPU, interleavedBins(), threadsShareMemory(), and usesGetGroupValueFast().

Referenced by QueryMemoryDescriptor().

1114  {
1118 }
CountDistinctDescriptors count_distinct_descriptors_
bool interleavedBins(const ExecutorDeviceType) const
static bool countDescriptorsLogicallyEmpty(const CountDistinctDescriptors &count_distinct_descriptors)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void QueryMemoryDescriptor::clearGroupColWidths ( )
inline

Definition at line 194 of file QueryMemoryDescriptor.h.

References group_col_widths_.

194 { group_col_widths_.clear(); }
std::vector< int8_t > group_col_widths_
void QueryMemoryDescriptor::clearSlotInfo ( )

Definition at line 1106 of file QueryMemoryDescriptor.cpp.

References ColSlotContext::clear(), and col_slot_context_.

1106  {
1108 }

+ Here is the call graph for this function:

void QueryMemoryDescriptor::clearTargetGroupbyIndices ( )
inline

Definition at line 243 of file QueryMemoryDescriptor.h.

References target_groupby_indices_.

243 { target_groupby_indices_.clear(); }
std::vector< int64_t > target_groupby_indices_
static bool QueryMemoryDescriptor::countDescriptorsLogicallyEmpty ( const CountDistinctDescriptors count_distinct_descriptors)
inlinestatic

Definition at line 149 of file QueryMemoryDescriptor.h.

References Invalid.

Referenced by blocksShareMemory(), canOutputColumnar(), countDistinctDescriptorsLogicallyEmpty(), lazyInitGroups(), and QueryMemoryDescriptor().

150  {
151  return std::all_of(count_distinct_descriptors.begin(),
152  count_distinct_descriptors.end(),
153  [](const CountDistinctDescriptor& desc) {
154  return desc.impl_type_ == CountDistinctImplType::Invalid;
155  });
156  }

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::countDistinctDescriptorsLogicallyEmpty ( ) const
inline

Definition at line 158 of file QueryMemoryDescriptor.h.

References count_distinct_descriptors_, and countDescriptorsLogicallyEmpty().

Referenced by QueryMemoryInitializer::allocateCountDistinctGpuMem().

158  {
160  }
CountDistinctDescriptors count_distinct_descriptors_
static bool countDescriptorsLogicallyEmpty(const CountDistinctDescriptors &count_distinct_descriptors)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::didOutputColumnar ( ) const
inline

Definition at line 265 of file QueryMemoryDescriptor.h.

References output_columnar_.

Referenced by ResultSetStorage::binSearchRowCount(), TargetExprCodegen::codegen(), ResultSetReductionJIT::codegen(), GroupByAndAggregate::codegen(), GroupByAndAggregate::codegenAggCalls(), GroupByAndAggregate::codegenAggColumnPtr(), TargetExprCodegen::codegenAggregate(), GroupByAndAggregate::codegenGroupBy(), GroupByAndAggregate::codegenMultiColumnBaselineHash(), GroupByAndAggregate::codegenMultiColumnPerfectHash(), GroupByAndAggregate::codegenOutputSlot(), GroupByAndAggregate::codegenSingleColumnPerfectHash(), GroupByAndAggregate::codegenWindowRowPointer(), copy_projection_buffer_from_gpu_columnar(), ResultSetStorage::copyKeyColWise(), ResultSet::createComparator(), ResultSet::didOutputColumnar(), anonymous_namespace{ResultSetReduction.cpp}::fill_slots(), ResultSetStorage::fillOneEntryColWise(), ResultSetStorage::fillOneEntryRowWise(), ResultSet::fixupQueryMemoryDescriptor(), get_cols_ptr(), ResultSet::getTargetValueFromBufferColwise(), GpuReductionHelperJIT::GpuReductionHelperJIT(), GpuSharedMemCodeBuilder::GpuSharedMemCodeBuilder(), ResultSetStorage::initializeBaselineValueSlots(), anonymous_namespace{TargetExprBuilder.cpp}::is_columnar_projection(), ResultSetReductionJIT::isEmpty(), ResultSetStorage::isEmptyEntry(), ResultSetStorage::isEmptyEntryColumnar(), QueryExecutionContext::launchCpuCode(), QueryExecutionContext::launchGpuCode(), ResultSet::makeGeoTargetValue(), ResultSetStorage::moveOneEntryToBuffer(), ResultSetStorage::reduce(), ResultSetStorage::reduceOneEntryBaseline(), ResultSetReductionJIT::reduceOneEntryBaselineIdx(), ResultSetStorage::reduceOneEntrySlotsBaseline(), ResultSetStorage::reduceOneSlotBaseline(), ResultSetStorage::reduceSingleRow(), and ResultSetStorage::rewriteAggregateBufferOffsets().

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::forceFourByteFloat ( ) const
inline

Definition at line 276 of file QueryMemoryDescriptor.h.

References force_4byte_float_.

Referenced by ResultSet::makeTargetValue().

+ Here is the caller graph for this function:

int64_t QueryMemoryDescriptor::getBucket ( ) const
inline

Definition at line 250 of file QueryMemoryDescriptor.h.

References bucket_.

Referenced by GroupByAndAggregate::codegenGroupBy(), and GroupByAndAggregate::codegenSingleColumnPerfectHash().

250 { return bucket_; }

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getBufferColSlotCount ( ) const

Definition at line 1004 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, ColSlotContext::getSlotCount(), i, and target_groupby_indices_.

Referenced by anonymous_namespace{ResultSetIteration.cpp}::advance_col_buff_to_slot(), anonymous_namespace{ResultSetReduction.cpp}::fill_slots(), ResultSetStorage::fillOneEntryColWise(), and ResultSetStorage::fillOneEntryRowWise().

1004  {
1005  size_t total_slot_count = col_slot_context_.getSlotCount();
1006 
1007  if (target_groupby_indices_.empty()) {
1008  return total_slot_count;
1009  }
1010  return total_slot_count - std::count_if(target_groupby_indices_.begin(),
1012  [](const int64_t i) { return i >= 0; });
1013 }
std::vector< int64_t > target_groupby_indices_
size_t getSlotCount() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getBufferSizeBytes ( const RelAlgExecutionUnit ra_exe_unit,
const unsigned  thread_count,
const ExecutorDeviceType  device_type 
) const

Definition at line 935 of file QueryMemoryDescriptor.cpp.

References entry_count_, streaming_top_n::get_heap_size(), getRowSize(), SortInfo::limit, SortInfo::offset, RelAlgExecutionUnit::sort_info, and use_streaming_top_n_.

Referenced by QueryMemoryInitializer::applyStreamingTopNOffsetCpu(), QueryMemoryInitializer::copyGroupByBuffersFromGpu(), create_dev_group_by_buffers(), getBufferSizeBytes(), and QueryMemoryInitializer::QueryMemoryInitializer().

938  {
939  if (use_streaming_top_n_) {
940  const size_t n = ra_exe_unit.sort_info.offset + ra_exe_unit.sort_info.limit;
941  return streaming_top_n::get_heap_size(getRowSize(), n, thread_count);
942  }
943  return getBufferSizeBytes(device_type, entry_count_);
944 }
size_t getBufferSizeBytes(const RelAlgExecutionUnit &ra_exe_unit, const unsigned thread_count, const ExecutorDeviceType device_type) const
const size_t limit
const SortInfo sort_info
size_t get_heap_size(const size_t row_size, const size_t n, const size_t thread_count)
const size_t offset

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getBufferSizeBytes ( const ExecutorDeviceType  device_type) const

Definition at line 981 of file QueryMemoryDescriptor.cpp.

References entry_count_, and getBufferSizeBytes().

982  {
983  return getBufferSizeBytes(device_type, entry_count_);
984 }
size_t getBufferSizeBytes(const RelAlgExecutionUnit &ra_exe_unit, const unsigned thread_count, const ExecutorDeviceType device_type) const

+ Here is the call graph for this function:

size_t QueryMemoryDescriptor::getBufferSizeBytes ( const ExecutorDeviceType  device_type,
const size_t  entry_count 
) const

Returns total amount of output buffer memory for each device (CPU/GPU)

Columnar: if projection: it returns index buffer + columnar buffer (all non-lazy columns) if group by: it returns the amount required for each group column (assumes 64-bit per group) + columnar buffer (all involved agg columns)

Row-wise: returns required memory per row multiplied by number of entries

Definition at line 957 of file QueryMemoryDescriptor.cpp.

References align_to_int64(), CHECK_GE, executor_, getColsSize(), getRowSize(), getTotalBytesOfColumnarBuffers(), group_col_widths_, interleavedBins(), keyless_hash_, output_columnar_, Projection, and query_desc_type_.

958  {
960  CHECK_GE(group_col_widths_.size(), size_t(1));
961  auto row_bytes = align_to_int64(getColsSize());
962 
963  return (interleavedBins(device_type) ? executor_->warpSize() : 1) * entry_count *
964  row_bytes;
965  }
966 
967  constexpr size_t row_index_width = sizeof(int64_t);
968  size_t total_bytes{0};
969  if (output_columnar_) {
971  ? row_index_width * entry_count
972  : sizeof(int64_t) * group_col_widths_.size() * entry_count) +
974  } else {
975  total_bytes = getRowSize() * entry_count;
976  }
977 
978  return total_bytes;
979 }
#define CHECK_GE(x, y)
Definition: Logger.h:210
size_t getTotalBytesOfColumnarBuffers() const
QueryDescriptionType query_desc_type_
bool interleavedBins(const ExecutorDeviceType) const
std::vector< int8_t > group_col_widths_
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)

+ Here is the call graph for this function:

size_t QueryMemoryDescriptor::getColCount ( ) const

Definition at line 1063 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getColCount().

1063  {
1064  return col_slot_context_.getColCount();
1065 }
size_t getColCount() const

+ Here is the call graph for this function:

size_t QueryMemoryDescriptor::getColOffInBytes ( const size_t  col_idx) const

Definition at line 814 of file QueryMemoryDescriptor.cpp.

References align_to_int64(), CHECK, CHECK_EQ, entry_count_, getColOnlyOffInBytes(), getEffectiveKeyWidth(), getPaddedSlotWidthBytes(), getPrependedGroupBufferSizeInBytes(), getWarpCount(), group_col_widths_, GroupByPerfectHash, keyless_hash_, output_columnar_, and query_desc_type_.

Referenced by TargetExprCodegen::codegen(), GroupByAndAggregate::codegenAggColumnPtr(), GroupByAndAggregate::codegenOutputSlot(), anonymous_namespace{QueryMemoryInitializer.cpp}::compact_projection_buffer_for_cpu_columnar(), copy_projection_buffer_from_gpu_columnar(), get_cols_ptr(), QueryExecutionContext::groupBufferToDeinterleavedResults(), QueryMemoryInitializer::initRowGroups(), inplace_sort_gpu(), and anonymous_namespace{Execute.cpp}::permute_storage_columnar().

814  {
815  const auto warp_count = getWarpCount();
816  if (output_columnar_) {
817  CHECK_EQ(size_t(1), warp_count);
818  size_t offset{0};
819  if (!keyless_hash_) {
821  }
822  for (size_t index = 0; index < col_idx; ++index) {
824  }
825  return offset;
826  }
827 
828  size_t offset{0};
829  if (keyless_hash_) {
830  // ignore, there's no group column in the output buffer
832  } else {
833  offset += group_col_widths_.size() * getEffectiveKeyWidth();
834  offset = align_to_int64(offset);
835  }
836  offset += getColOnlyOffInBytes(col_idx);
837  return offset;
838 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
size_t getEffectiveKeyWidth() const
size_t getColOnlyOffInBytes(const size_t col_idx) const
size_t getPrependedGroupBufferSizeInBytes() const
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
QueryDescriptionType query_desc_type_
#define CHECK(condition)
Definition: Logger.h:197
std::vector< int8_t > group_col_widths_
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getColOffInBytesInNextBin ( const size_t  col_idx) const

Definition at line 873 of file QueryMemoryDescriptor.cpp.

References CHECK_EQ, getPaddedSlotWidthBytes(), getRowSize(), getWarpCount(), group_col_widths_, and output_columnar_.

Referenced by QueryExecutionContext::groupBufferToDeinterleavedResults().

873  {
874  auto warp_count = getWarpCount();
875  if (output_columnar_) {
876  CHECK_EQ(size_t(1), group_col_widths_.size());
877  CHECK_EQ(size_t(1), warp_count);
878  return getPaddedSlotWidthBytes(col_idx);
879  }
880 
881  return warp_count * getRowSize();
882 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
std::vector< int8_t > group_col_widths_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getColOnlyOffInBytes ( const size_t  col_idx) const

Definition at line 801 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getColOnlyOffInBytes().

Referenced by TargetExprCodegen::codegen(), GroupByAndAggregate::codegenAggColumnPtr(), TargetExprCodegen::codegenAggregate(), getColOffInBytes(), and ResultSetStorage::reduceSingleRow().

801  {
802  return col_slot_context_.getColOnlyOffInBytes(col_idx);
803 }
size_t getColOnlyOffInBytes(const size_t slot_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const ColSlotContext& QueryMemoryDescriptor::getColSlotContext ( ) const
inline

Definition at line 291 of file QueryMemoryDescriptor.h.

References col_slot_context_.

Referenced by ResultSetStorage::reduceEntriesNoCollisionsColWise(), and ResultSetReductionJIT::reduceOneEntryTargetsNoCollisions().

291 { return col_slot_context_; }

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getColsSize ( ) const

Definition at line 740 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getAllSlotsAlignedPaddedSize().

Referenced by getBufferSizeBytes(), getRowSize(), and QueryExecutionContext::launchCpuCode().

740  {
742 }
size_t getAllSlotsAlignedPaddedSize() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getCompactByteWidth ( ) const

Definition at line 762 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getCompactByteWidth().

Referenced by anonymous_namespace{TargetExprBuilder.cpp}::get_initial_agg_val(), and init_agg_val_vec().

762  {
764 }
size_t getCompactByteWidth() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const CountDistinctDescriptor& QueryMemoryDescriptor::getCountDistinctDescriptor ( const size_t  idx) const
inline
size_t QueryMemoryDescriptor::getCountDistinctDescriptorsSize ( ) const
inline

Definition at line 258 of file QueryMemoryDescriptor.h.

References count_distinct_descriptors_.

Referenced by QueryMemoryInitializer::allocateCountDistinctGpuMem(), anonymous_namespace{QueryMemoryInitializer.cpp}::check_total_bitmap_memory(), ResultSetReductionJIT::reduceOneApproxMedianSlot(), ResultSetStorage::reduceOneApproxMedianSlot(), ResultSetReductionJIT::reduceOneCountDistinctSlot(), and ResultSetStorage::reduceOneCountDistinctSlot().

258  {
259  return count_distinct_descriptors_.size();
260  }
CountDistinctDescriptors count_distinct_descriptors_

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getEntryCount ( ) const
inline

Definition at line 245 of file QueryMemoryDescriptor.h.

References entry_count_.

Referenced by advance_to_next_columnar_target_buff(), QueryMemoryInitializer::allocateCountDistinctGpuMem(), QueryMemoryInitializer::applyStreamingTopNOffsetCpu(), QueryMemoryInitializer::applyStreamingTopNOffsetGpu(), ResultSetStorage::binSearchRowCount(), anonymous_namespace{QueryMemoryInitializer.cpp}::check_total_bitmap_memory(), ResultSetReductionJIT::codegen(), GroupByAndAggregate::codegenMultiColumnBaselineHash(), GroupByAndAggregate::codegenMultiColumnPerfectHash(), GroupByAndAggregate::codegenOutputSlot(), GpuSharedMemCodeBuilder::codegenReduction(), GroupByAndAggregate::codegenWindowRowPointer(), anonymous_namespace{QueryMemoryInitializer.cpp}::compact_projection_buffer_for_cpu_columnar(), QueryMemoryInitializer::compactProjectionBuffersCpu(), QueryMemoryInitializer::compactProjectionBuffersGpu(), copy_group_by_buffers_from_gpu(), create_dev_group_by_buffers(), Executor::createKernels(), ResultSet::entryCount(), Executor::executePlanWithGroupBy(), ResultSetStorage::fillOneEntryColWise(), ResultSetStorage::fillOneEntryRowWise(), anonymous_namespace{ResultSetReduction.cpp}::get_matching_group_value_reduction(), ResultSetStorage::getEntryCount(), getPrependedGroupBufferSizeInBytes(), getPrependedGroupColOffInBytes(), ResultSet::getTargetValueFromBufferColwise(), QueryMemoryInitializer::initColumnarGroups(), QueryMemoryInitializer::initGroupByBuffer(), ResultSetStorage::initializeBaselineValueSlots(), ResultSetStorage::initializeColWise(), ResultSetStorage::initializeRowWise(), Executor::inlineHoistedLiterals(), inplace_sort_gpu(), QueryExecutionContext::launchGpuCode(), ResultSetStorage::moveEntriesToBuffer(), ResultSetStorage::moveOneEntryToBuffer(), QueryMemoryDescriptor(), QueryMemoryInitializer::QueryMemoryInitializer(), ResultSetStorage::reduce(), ResultSetStorage::reduceOneEntryBaseline(), ResultSetStorage::reduceOneEntrySlotsBaseline(), ResultSetStorage::reduceOneSlotBaseline(), and ResultSetStorage::rewriteAggregateBufferOffsets().

245 { return entry_count_; }

+ Here is the caller graph for this function:

const Executor* QueryMemoryDescriptor::getExecutor ( ) const
inline

Definition at line 167 of file QueryMemoryDescriptor.h.

References executor_.

Referenced by anonymous_namespace{Execute.cpp}::build_row_for_empty_input(), ResultSetReductionJIT::codegen(), anonymous_namespace{Execute.cpp}::fill_entries_for_empty_input(), ResultSet::getVarlenOrderEntry(), ResultSet::makeGeoTargetValue(), and ResultSet::makeVarlenTargetValue().

167 { return executor_; }

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getKeyCount ( ) const
inline

Definition at line 281 of file QueryMemoryDescriptor.h.

References getGroupbyColCount(), and keyless_hash_.

Referenced by anonymous_namespace{Execute.cpp}::permute_storage_columnar().

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const int8_t QueryMemoryDescriptor::getLogicalSlotWidthBytes ( const size_t  slot_idx) const

Definition at line 1075 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, ColSlotContext::getSlotInfo(), and SlotSize::logical_size.

Referenced by QueryMemoryInitializer::allocateCountDistinctBuffers(), QueryMemoryInitializer::allocateTDigests(), TargetExprCodegen::codegenAggregate(), and ResultSet::getTargetValueFromBufferRowwise().

1076  {
1077  return col_slot_context_.getSlotInfo(slot_idx).logical_size;
1078 }
int8_t logical_size
const SlotSize & getSlotInfo(const size_t slot_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int64_t QueryMemoryDescriptor::getMaxVal ( ) const
inline

Definition at line 249 of file QueryMemoryDescriptor.h.

References max_val_.

Referenced by GroupByAndAggregate::codegenGroupBy().

249 { return max_val_; }

+ Here is the caller graph for this function:

int64_t QueryMemoryDescriptor::getMinVal ( ) const
inline

Definition at line 248 of file QueryMemoryDescriptor.h.

References min_val_.

Referenced by GroupByAndAggregate::codegenSingleColumnPerfectHash().

248 { return min_val_; }

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getNextColOffInBytes ( const int8_t *  col_ptr,
const size_t  bin,
const size_t  col_idx 
) const

Definition at line 884 of file QueryMemoryDescriptor.cpp.

References align_to_int64(), CHECK, CHECK_EQ, entry_count_, getPaddedSlotWidthBytes(), getSlotCount(), getWarpCount(), group_col_widths_, and output_columnar_.

886  {
888  size_t offset{0};
889  auto warp_count = getWarpCount();
890  const auto chosen_bytes = getPaddedSlotWidthBytes(col_idx);
891  const auto total_slot_count = getSlotCount();
892  if (col_idx + 1 == total_slot_count) {
893  if (output_columnar_) {
894  return (entry_count_ - bin) * chosen_bytes;
895  } else {
896  return static_cast<size_t>(align_to_int64(col_ptr + chosen_bytes) - col_ptr);
897  }
898  }
899 
900  const auto next_chosen_bytes = getPaddedSlotWidthBytes(col_idx + 1);
901  if (output_columnar_) {
902  CHECK_EQ(size_t(1), group_col_widths_.size());
903  CHECK_EQ(size_t(1), warp_count);
904 
905  offset = align_to_int64(entry_count_ * chosen_bytes);
906 
907  offset += bin * (next_chosen_bytes - chosen_bytes);
908  return offset;
909  }
910 
911  if (next_chosen_bytes == sizeof(int64_t)) {
912  return static_cast<size_t>(align_to_int64(col_ptr + chosen_bytes) - col_ptr);
913  } else {
914  return chosen_bytes;
915  }
916 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
#define CHECK(condition)
Definition: Logger.h:197
std::vector< int8_t > group_col_widths_
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)

+ Here is the call graph for this function:

size_t QueryMemoryDescriptor::getNextColOffInBytesRowOnly ( const int8_t *  col_ptr,
const size_t  col_idx 
) const

Definition at line 918 of file QueryMemoryDescriptor.cpp.

References align_to_int64(), getPaddedSlotWidthBytes(), and getSlotCount().

Referenced by QueryMemoryInitializer::initColumnsPerRow().

919  {
920  const auto chosen_bytes = getPaddedSlotWidthBytes(col_idx);
921  const auto total_slot_count = getSlotCount();
922  if (col_idx + 1 == total_slot_count) {
923  return static_cast<size_t>(align_to_int64(col_ptr + chosen_bytes) - col_ptr);
924  }
925 
926  const auto next_chosen_bytes = getPaddedSlotWidthBytes(col_idx + 1);
927 
928  if (next_chosen_bytes == sizeof(int64_t)) {
929  return static_cast<size_t>(align_to_int64(col_ptr + chosen_bytes) - col_ptr);
930  } else {
931  return chosen_bytes;
932  }
933 }
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getPaddedColWidthForRange ( const size_t  offset,
const size_t  range 
) const
inline

Definition at line 208 of file QueryMemoryDescriptor.h.

References getPaddedSlotWidthBytes(), and i.

Referenced by result_set::get_byteoff_of_slot(), and ResultSet::makeGeoTargetValue().

208  {
209  size_t ret = 0;
210  for (size_t i = offset; i < offset + range; i++) {
211  ret += static_cast<size_t>(getPaddedSlotWidthBytes(i));
212  }
213  return ret;
214  }
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const int8_t QueryMemoryDescriptor::getPaddedSlotWidthBytes ( const size_t  slot_idx) const

Definition at line 1071 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, ColSlotContext::getSlotInfo(), and SlotSize::padded_size.

Referenced by advance_target_ptr_row_wise(), advance_to_next_columnar_target_buff(), TargetExprCodegen::codegen(), anonymous_namespace{GpuSharedMemoryUtils.cpp}::codegen_smem_dest_slot_ptr(), TargetExprCodegen::codegenAggregate(), GroupByAndAggregate::codegenOutputSlot(), compact_init_vals(), anonymous_namespace{QueryMemoryInitializer.cpp}::compact_projection_buffer_for_cpu_columnar(), copy_projection_buffer_from_gpu_columnar(), ResultSet::copyColumnIntoBuffer(), Executor::executePlanWithoutGroupBy(), result_set::get_width_for_slot(), getColOffInBytes(), getColOffInBytesInNextBin(), getNextColOffInBytes(), getNextColOffInBytesRowOnly(), getPaddedColWidthForRange(), ResultSet::getPaddedSlotWidthBytes(), ResultSet::getTargetValueFromBufferColwise(), ResultSet::getTargetValueFromBufferRowwise(), init_agg_val_vec(), QueryMemoryInitializer::initColumnarGroups(), QueryMemoryInitializer::initColumnsPerRow(), inplace_sort_gpu(), ResultSetReductionJIT::isEmpty(), ResultSetStorage::isEmptyEntry(), ResultSetStorage::isEmptyEntryColumnar(), ResultSet::makeGeoTargetValue(), TargetExprCodegenBuilder::operator()(), anonymous_namespace{Execute.cpp}::permute_storage_columnar(), ResultSetStorage::reduceEntriesNoCollisionsColWise(), ResultSetReductionJIT::reduceOneAggregateSlot(), ResultSetReductionJIT::reduceOneEntryTargetsNoCollisions(), ResultSetStorage::reduceOneSlot(), ResultSetStorage::reduceSingleRow(), and ResultSetStorage::rewriteAggregateBufferOffsets().

1071  {
1072  return col_slot_context_.getSlotInfo(slot_idx).padded_size;
1073 }
const SlotSize & getSlotInfo(const size_t slot_idx) const
int8_t padded_size

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getPrependedGroupBufferSizeInBytes ( ) const

Definition at line 862 of file QueryMemoryDescriptor.cpp.

References align_to_int64(), CHECK, getEntryCount(), getGroupbyColCount(), groupColWidth(), and output_columnar_.

Referenced by getColOffInBytes().

862  {
864  size_t buffer_size{0};
865  for (size_t group_idx = 0; group_idx < getGroupbyColCount(); group_idx++) {
866  buffer_size += align_to_int64(
867  std::max(groupColWidth(group_idx), static_cast<int8_t>(sizeof(int64_t))) *
868  getEntryCount());
869  }
870  return buffer_size;
871 }
int8_t groupColWidth(const size_t key_idx) const
size_t getGroupbyColCount() const
#define CHECK(condition)
Definition: Logger.h:197
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getPrependedGroupColOffInBytes ( const size_t  group_idx) const

Definition at line 844 of file QueryMemoryDescriptor.cpp.

References align_to_int64(), CHECK, getEntryCount(), getGroupbyColCount(), groupColWidth(), and output_columnar_.

Referenced by ResultSetStorage::copyKeyColWise(), ResultSetStorage::isEmptyEntryColumnar(), and anonymous_namespace{Execute.cpp}::permute_storage_columnar().

845  {
847  CHECK(group_idx < getGroupbyColCount());
848  size_t offset{0};
849  for (size_t col_idx = 0; col_idx < group_idx; col_idx++) {
850  // TODO(Saman): relax that int64_bit part immediately
851  offset += align_to_int64(
852  std::max(groupColWidth(col_idx), static_cast<int8_t>(sizeof(int64_t))) *
853  getEntryCount());
854  }
855  return offset;
856 }
int8_t groupColWidth(const size_t key_idx) const
size_t getGroupbyColCount() const
#define CHECK(condition)
Definition: Logger.h:197
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

QueryDescriptionType QueryMemoryDescriptor::getQueryDescriptionType ( ) const
inline

Definition at line 169 of file QueryMemoryDescriptor.h.

References query_desc_type_.

Referenced by ResultSetStorage::binSearchRowCount(), ResultSetReductionJIT::codegen(), GroupByAndAggregate::codegen(), GpuReductionHelperJIT::codegen(), GroupByAndAggregate::codegenAggCalls(), GroupByAndAggregate::codegenAggColumnPtr(), GroupByAndAggregate::codegenGroupBy(), GroupByAndAggregate::codegenMultiColumnPerfectHash(), GroupByAndAggregate::codegenOutputSlot(), Executor::collectAllDeviceResults(), copy_projection_buffer_from_gpu_columnar(), Executor::createKernels(), ResultSet::getQueryDescriptionType(), GpuReductionHelperJIT::GpuReductionHelperJIT(), GpuSharedMemCodeBuilder::GpuSharedMemCodeBuilder(), init_agg_val_vec(), QueryMemoryInitializer::initColumnarGroups(), anonymous_namespace{TargetExprBuilder.cpp}::is_columnar_projection(), ResultSetReductionJIT::isEmpty(), ResultSetStorage::isEmptyEntry(), ResultSetStorage::isEmptyEntryColumnar(), isSingleColumnGroupByWithPerfectHash(), QueryExecutionContext::launchCpuCode(), QueryExecutionContext::launchGpuCode(), ResultSetStorage::moveEntriesToBuffer(), TargetExprCodegenBuilder::operator()(), ResultSetStorage::reduce(), Executor::reduceMultiDeviceResultSets(), ResultSetStorage::reduceOneEntryBaseline(), ResultSetReductionJIT::reduceOneEntryBaselineIdx(), ResultSetReductionJIT::reduceOneEntryNoCollisionsIdx(), ExecutionKernel::run(), ExecutionKernel::runImpl(), target_exprs_to_infos(), and ResultSet::updateStorageEntryCount().

169 { return query_desc_type_; }
QueryDescriptionType query_desc_type_

+ Here is the caller graph for this function:

std::unique_ptr< QueryExecutionContext > QueryMemoryDescriptor::getQueryExecutionContext ( const RelAlgExecutionUnit ra_exe_unit,
const Executor executor,
const ExecutorDeviceType  device_type,
const ExecutorDispatchMode  dispatch_mode,
const int  device_id,
const int64_t  num_rows,
const std::vector< std::vector< const int8_t * >> &  col_buffers,
const std::vector< std::vector< uint64_t >> &  frag_offsets,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
const bool  output_columnar,
const bool  sort_on_gpu,
const size_t  thread_idx,
RenderInfo render_info 
) const

Definition at line 618 of file QueryMemoryDescriptor.cpp.

References DEBUG_TIMER, and QueryExecutionContext.

Referenced by ExecutionKernel::runImpl().

631  {
632  auto timer = DEBUG_TIMER(__func__);
633  if (frag_offsets.empty()) {
634  return nullptr;
635  }
636  return std::unique_ptr<QueryExecutionContext>(
637  new QueryExecutionContext(ra_exe_unit,
638  *this,
639  executor,
640  device_type,
641  dispatch_mode,
642  device_id,
643  num_rows,
644  col_buffers,
645  frag_offsets,
646  row_set_mem_owner,
647  output_columnar,
648  sort_on_gpu,
649  thread_idx,
650  render_info));
651 }
#define DEBUG_TIMER(name)
Definition: Logger.h:313
void sort_on_gpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes, ThrustAllocator &alloc)

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getRowSize ( ) const

Definition at line 744 of file QueryMemoryDescriptor.cpp.

References align_to_int64(), CHECK, getColsSize(), getEffectiveKeyWidth(), group_col_widths_, GroupByPerfectHash, keyless_hash_, output_columnar_, and query_desc_type_.

Referenced by QueryMemoryInitializer::applyStreamingTopNOffsetCpu(), QueryMemoryInitializer::applyStreamingTopNOffsetGpu(), ResultSetLogicalValuesBuilder::build(), GroupByAndAggregate::codegenGroupBy(), GroupByAndAggregate::codegenOutputSlot(), GroupByAndAggregate::codegenWindowRowPointer(), QueryMemoryInitializer::copyGroupByBuffersFromGpu(), create_dev_group_by_buffers(), getBufferSizeBytes(), getColOffInBytesInNextBin(), QueryMemoryInitializer::initRowGroups(), Executor::inlineHoistedLiterals(), anonymous_namespace{Execute.cpp}::permute_storage_row_wise(), QueryMemoryDescriptor(), QueryMemoryInitializer::QueryMemoryInitializer(), and ResultSetStorage::reduceSingleRow().

744  {
746  size_t total_bytes{0};
747  if (keyless_hash_) {
748  // ignore, there's no group column in the output buffer
750  } else {
751  total_bytes += group_col_widths_.size() * getEffectiveKeyWidth();
752  total_bytes = align_to_int64(total_bytes);
753  }
754  total_bytes += getColsSize();
755  return align_to_int64(total_bytes);
756 }
size_t getEffectiveKeyWidth() const
QueryDescriptionType query_desc_type_
#define CHECK(condition)
Definition: Logger.h:197
std::vector< int8_t > group_col_widths_
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getRowWidth ( ) const

Definition at line 1091 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getAllSlotsPaddedSize().

Referenced by get_row_bytes().

1091  {
1092  // Note: Actual row size may include padding (see ResultSetBufferAccessors.h)
1094 }
size_t getAllSlotsPaddedSize() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getSlotCount ( ) const
const int8_t QueryMemoryDescriptor::getSlotIndexForSingleSlotCol ( const size_t  col_idx) const

Definition at line 1080 of file QueryMemoryDescriptor.cpp.

References CHECK_EQ, col_slot_context_, and ColSlotContext::getSlotsForCol().

Referenced by QueryMemoryInitializer::allocateCountDistinctBuffers(), and QueryMemoryInitializer::allocateTDigests().

1081  {
1082  const auto& col_slots = col_slot_context_.getSlotsForCol(col_idx);
1083  CHECK_EQ(col_slots.size(), size_t(1));
1084  return col_slots.front();
1085 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
const std::vector< size_t > & getSlotsForCol(const size_t col_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int64_t QueryMemoryDescriptor::getTargetGroupbyIndex ( const size_t  target_idx) const
inline

Definition at line 227 of file QueryMemoryDescriptor.h.

References CHECK_LT, and target_groupby_indices_.

Referenced by ResultSet::getTargetValueFromBufferColwise(), ResultSet::getTargetValueFromBufferRowwise(), ResultSetReductionJIT::reduceOneEntryBaseline(), ResultSetStorage::reduceOneEntrySlotsBaseline(), ResultSetReductionJIT::reduceOneEntryTargetsNoCollisions(), ResultSetReductionJIT::reduceOneSlot(), ResultSetStorage::reduceOneSlot(), and reductionKey().

227  {
228  CHECK_LT(target_idx, target_groupby_indices_.size());
229  return target_groupby_indices_[target_idx];
230  }
std::vector< int64_t > target_groupby_indices_
#define CHECK_LT(x, y)
Definition: Logger.h:207

+ Here is the caller graph for this function:

int32_t QueryMemoryDescriptor::getTargetIdxForKey ( ) const
inline

Definition at line 182 of file QueryMemoryDescriptor.h.

References idx_target_as_key_.

Referenced by ResultSetReductionJIT::isEmpty(), ResultSetStorage::isEmptyEntry(), ResultSetStorage::isEmptyEntryColumnar(), ResultSetStorage::reduceSingleRow(), and reductionKey().

182 { return idx_target_as_key_; }

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getTotalBytesOfColumnarBuffers ( ) const
private

Returns the maximum total number of bytes (including required paddings) to store all non-lazy columns' results for columnar cases.

Definition at line 771 of file QueryMemoryDescriptor.cpp.

References CHECK, col_slot_context_, entry_count_, ColSlotContext::getTotalBytesOfColumnarBuffers(), and output_columnar_.

Referenced by getBufferSizeBytes(), and getTotalBytesOfColumnarProjections().

771  {
774 }
size_t getTotalBytesOfColumnarBuffers(const size_t entry_count) const
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getTotalBytesOfColumnarBuffers ( const size_t  num_entries_per_column) const
private

This is a helper function that returns the total number of bytes (including required paddings) to store all non-lazy columns' results for columnar cases.

Definition at line 780 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getTotalBytesOfColumnarBuffers().

781  {
782  return col_slot_context_.getTotalBytesOfColumnarBuffers(num_entries_per_column);
783 }
size_t getTotalBytesOfColumnarBuffers(const size_t entry_count) const

+ Here is the call graph for this function:

size_t QueryMemoryDescriptor::getTotalBytesOfColumnarProjections ( const size_t  projection_count) const
private

Returns the effective total number of bytes from columnar projections, which includes 1) total number of bytes used to store all non-lazy columns 2) total number of bytes used to store row indices (for lazy fetches, etc.)

NOTE: this function does not represent the buffer sizes dedicated for the results, but the required memory to fill all valid results into a compact new buffer (with no holes in it)

Definition at line 794 of file QueryMemoryDescriptor.cpp.

References getTotalBytesOfColumnarBuffers().

795  {
796  constexpr size_t row_index_width = sizeof(int64_t);
797  return getTotalBytesOfColumnarBuffers(projection_count) +
798  row_index_width * projection_count;
799 }
size_t getTotalBytesOfColumnarBuffers() const

+ Here is the call graph for this function:

size_t QueryMemoryDescriptor::getWarpCount ( ) const

Definition at line 758 of file QueryMemoryDescriptor.cpp.

References executor_, and interleaved_bins_on_gpu_.

Referenced by getColOffInBytes(), getColOffInBytesInNextBin(), and getNextColOffInBytes().

758  {
759  return (interleaved_bins_on_gpu_ ? executor_->warpSize() : 1);
760 }

+ Here is the caller graph for this function:

int8_t QueryMemoryDescriptor::groupColWidth ( const size_t  key_idx) const
inline

Definition at line 185 of file QueryMemoryDescriptor.h.

References CHECK_LT, and group_col_widths_.

Referenced by ResultSetStorage::copyKeyColWise(), getPrependedGroupBufferSizeInBytes(), getPrependedGroupColOffInBytes(), ResultSetStorage::isEmptyEntryColumnar(), and anonymous_namespace{Execute.cpp}::permute_storage_columnar().

185  {
186  CHECK_LT(key_idx, group_col_widths_.size());
187  return group_col_widths_[key_idx];
188  }
#define CHECK_LT(x, y)
Definition: Logger.h:207
std::vector< int8_t > group_col_widths_

+ Here is the caller graph for this function:

const auto QueryMemoryDescriptor::groupColWidthsBegin ( ) const
inline

Definition at line 192 of file QueryMemoryDescriptor.h.

References group_col_widths_.

192 { return group_col_widths_.begin(); }
std::vector< int8_t > group_col_widths_
const auto QueryMemoryDescriptor::groupColWidthsEnd ( ) const
inline

Definition at line 193 of file QueryMemoryDescriptor.h.

References group_col_widths_.

193 { return group_col_widths_.end(); }
std::vector< int8_t > group_col_widths_
bool QueryMemoryDescriptor::hasInterleavedBinsOnGpu ( ) const
inline

Definition at line 179 of file QueryMemoryDescriptor.h.

References interleaved_bins_on_gpu_.

bool QueryMemoryDescriptor::hasNulls ( ) const
inline

Definition at line 252 of file QueryMemoryDescriptor.h.

References has_nulls_.

Referenced by GroupByAndAggregate::codegenGroupBy().

252 { return has_nulls_; }

+ Here is the caller graph for this function:

std::unique_ptr< QueryMemoryDescriptor > QueryMemoryDescriptor::init ( const Executor executor,
const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  query_infos,
const ColRangeInfo col_range_info,
const KeylessInfo keyless_info,
const bool  allow_multifrag,
const ExecutorDeviceType  device_type,
const int8_t  crt_min_byte_width,
const bool  sort_on_gpu_hint,
const size_t  shard_count,
const size_t  max_groups_buffer_entry_count,
RenderInfo render_info,
const CountDistinctDescriptors  count_distinct_descriptors,
const bool  must_use_baseline_sort,
const bool  output_columnar_hint,
const bool  streaming_top_n_hint 
)
static

Definition at line 182 of file QueryMemoryDescriptor.cpp.

References get_col_byte_widths(), RelAlgExecutionUnit::groupby_exprs, and RelAlgExecutionUnit::target_exprs.

Referenced by GroupByAndAggregate::initQueryMemoryDescriptorImpl().

198  {
199  auto group_col_widths = get_col_byte_widths(ra_exe_unit.groupby_exprs);
200  const bool is_group_by{!group_col_widths.empty()};
201 
202  auto col_slot_context = ColSlotContext(ra_exe_unit.target_exprs, {});
203 
204  const auto min_slot_size = QueryMemoryDescriptor::pick_target_compact_width(
205  ra_exe_unit, query_infos, crt_min_byte_width);
206 
207  col_slot_context.setAllSlotsPaddedSize(min_slot_size);
208  col_slot_context.validate();
209 
210  if (!is_group_by) {
211  CHECK(!must_use_baseline_sort);
212 
213  return std::make_unique<QueryMemoryDescriptor>(
214  executor,
215  ra_exe_unit,
216  query_infos,
217  allow_multifrag,
218  false,
219  false,
220  -1,
221  ColRangeInfo{ra_exe_unit.estimator ? QueryDescriptionType::Estimator
223  0,
224  0,
225  0,
226  false},
227  col_slot_context,
228  std::vector<int8_t>{},
229  /*group_col_compact_width=*/0,
230  std::vector<int64_t>{},
231  /*entry_count=*/1,
232  count_distinct_descriptors,
233  false,
234  output_columnar_hint,
235  render_info && render_info->isPotentialInSituRender(),
236  must_use_baseline_sort,
237  /*use_streaming_top_n=*/false);
238  }
239 
240  size_t entry_count = 1;
241  auto actual_col_range_info = col_range_info;
242  bool interleaved_bins_on_gpu = false;
243  bool keyless_hash = false;
244  bool streaming_top_n = false;
245  int8_t group_col_compact_width = 0;
246  int32_t idx_target_as_key = -1;
247  auto output_columnar = output_columnar_hint;
248  std::vector<int64_t> target_groupby_indices;
249 
250  switch (col_range_info.hash_type_) {
252  if (render_info) {
253  render_info->setInSituDataIfUnset(false);
254  }
255  // keyless hash: whether or not group columns are stored at the beginning of the
256  // output buffer
257  keyless_hash =
258  (!sort_on_gpu_hint ||
260  col_range_info.max, col_range_info.min, col_range_info.bucket)) &&
261  !col_range_info.bucket && !must_use_baseline_sort && keyless_info.keyless;
262 
263  // if keyless, then this target index indicates wheter an entry is empty or not
264  // (acts as a key)
265  idx_target_as_key = keyless_info.target_index;
266 
267  if (group_col_widths.size() > 1) {
268  // col range info max contains the expected cardinality of the output
269  entry_count = static_cast<size_t>(actual_col_range_info.max);
270  actual_col_range_info.bucket = 0;
271  } else {
272  // single column perfect hash
273  entry_count = std::max(
274  GroupByAndAggregate::getBucketedCardinality(col_range_info), int64_t(1));
275  const size_t interleaved_max_threshold{512};
276 
277  if (must_use_baseline_sort) {
278  target_groupby_indices = target_expr_group_by_indices(ra_exe_unit.groupby_exprs,
279  ra_exe_unit.target_exprs);
280  col_slot_context =
281  ColSlotContext(ra_exe_unit.target_exprs, target_groupby_indices);
282  }
283 
284  bool has_varlen_sample_agg = false;
285  for (const auto& target_expr : ra_exe_unit.target_exprs) {
286  if (target_expr->get_contains_agg()) {
287  const auto agg_expr = dynamic_cast<Analyzer::AggExpr*>(target_expr);
288  CHECK(agg_expr);
289  if (agg_expr->get_aggtype() == kSAMPLE &&
290  agg_expr->get_type_info().is_varlen()) {
291  has_varlen_sample_agg = true;
292  break;
293  }
294  }
295  }
296 
297  interleaved_bins_on_gpu = keyless_hash && !has_varlen_sample_agg &&
298  (entry_count <= interleaved_max_threshold) &&
299  (device_type == ExecutorDeviceType::GPU) &&
301  count_distinct_descriptors) &&
302  !output_columnar;
303  }
304  break;
305  }
307  if (render_info) {
308  render_info->setInSituDataIfUnset(false);
309  }
310  entry_count = shard_count
311  ? (max_groups_buffer_entry_count + shard_count - 1) / shard_count
312  : max_groups_buffer_entry_count;
313  target_groupby_indices = target_expr_group_by_indices(ra_exe_unit.groupby_exprs,
314  ra_exe_unit.target_exprs);
315  col_slot_context = ColSlotContext(ra_exe_unit.target_exprs, target_groupby_indices);
316 
317  group_col_compact_width =
318  output_columnar ? 8
319  : pick_baseline_key_width(ra_exe_unit, query_infos, executor);
320 
321  actual_col_range_info =
323  break;
324  }
326  CHECK(!must_use_baseline_sort);
327 
328  if (streaming_top_n_hint && use_streaming_top_n(ra_exe_unit, output_columnar)) {
329  streaming_top_n = true;
330  entry_count = ra_exe_unit.sort_info.offset + ra_exe_unit.sort_info.limit;
331  } else {
332  if (ra_exe_unit.use_bump_allocator) {
333  output_columnar = false;
334  entry_count = 0;
335  } else {
336  entry_count = ra_exe_unit.scan_limit
337  ? static_cast<size_t>(ra_exe_unit.scan_limit)
338  : max_groups_buffer_entry_count;
339  }
340  }
341 
342  const auto catalog = executor->getCatalog();
343  CHECK(catalog);
344  target_groupby_indices = executor->plan_state_->allow_lazy_fetch_
345  ? target_expr_proj_indices(ra_exe_unit, *catalog)
346  : std::vector<int64_t>{};
347 
348  col_slot_context = ColSlotContext(ra_exe_unit.target_exprs, target_groupby_indices);
349  break;
350  }
351  default:
352  UNREACHABLE() << "Unknown query type";
353  }
354 
355  return std::make_unique<QueryMemoryDescriptor>(
356  executor,
357  ra_exe_unit,
358  query_infos,
359  allow_multifrag,
360  keyless_hash,
361  interleaved_bins_on_gpu,
362  idx_target_as_key,
363  actual_col_range_info,
364  col_slot_context,
365  group_col_widths,
366  group_col_compact_width,
367  target_groupby_indices,
368  entry_count,
369  count_distinct_descriptors,
370  sort_on_gpu_hint,
371  output_columnar,
372  render_info && render_info->isPotentialInSituRender(),
373  must_use_baseline_sort,
374  streaming_top_n);
375 }
std::vector< Analyzer::Expr * > target_exprs
static bool many_entries(const int64_t max_val, const int64_t min_val, const int64_t bucket)
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
std::vector< int64_t > target_expr_proj_indices(const RelAlgExecutionUnit &ra_exe_unit, const Catalog_Namespace::Catalog &cat)
const bool keyless
bool setInSituDataIfUnset(const bool is_in_situ_data)
Definition: RenderInfo.cpp:98
QueryDescriptionType hash_type_
#define UNREACHABLE()
Definition: Logger.h:241
bool use_streaming_top_n(const RelAlgExecutionUnit &ra_exe_unit, const bool output_columnar)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
static int8_t pick_target_compact_width(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const int8_t crt_min_byte_width)
const int32_t target_index
std::vector< int64_t > target_expr_group_by_indices(const std::list< std::shared_ptr< Analyzer::Expr >> &groupby_exprs, const std::vector< Analyzer::Expr * > &target_exprs)
std::vector< int8_t > get_col_byte_widths(const T &col_expr_list)
bool isPotentialInSituRender() const
Definition: RenderInfo.cpp:64
#define CHECK(condition)
Definition: Logger.h:197
static bool countDescriptorsLogicallyEmpty(const CountDistinctDescriptors &count_distinct_descriptors)
if(yyssp >=yyss+yystacksize-1)
int8_t pick_baseline_key_width(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const Executor *executor)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::interleavedBins ( const ExecutorDeviceType  device_type) const

Definition at line 1047 of file QueryMemoryDescriptor.cpp.

References GPU, and interleaved_bins_on_gpu_.

Referenced by canOutputColumnar(), GroupByAndAggregate::codegenSingleColumnPerfectHash(), getBufferSizeBytes(), QueryExecutionContext::groupBufferToResults(), QueryMemoryInitializer::initGroupByBuffer(), and QueryMemoryInitializer::QueryMemoryInitializer().

1047  {
1048  return interleaved_bins_on_gpu_ && device_type == ExecutorDeviceType::GPU;
1049 }

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::isGroupBy ( ) const
inline

Definition at line 196 of file QueryMemoryDescriptor.h.

References group_col_widths_.

Referenced by anonymous_namespace{TargetExprBuilder.cpp}::get_initial_agg_val(), init_agg_val_vec(), QueryMemoryInitializer::initColumnsPerRow(), QueryExecutionContext::launchCpuCode(), QueryExecutionContext::launchGpuCode(), and QueryMemoryInitializer::QueryMemoryInitializer().

196 { return !group_col_widths_.empty(); }
std::vector< int8_t > group_col_widths_

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::isLogicalSizedColumnsAllowed ( ) const

Definition at line 997 of file QueryMemoryDescriptor.cpp.

References g_cluster, output_columnar_, Projection, and query_desc_type_.

Referenced by TargetExprCodegen::codegenAggregate(), TargetExprCodegenBuilder::codegenMultiSlotSampleExpressions(), TargetExprCodegenBuilder::codegenSlotEmptyKey(), init_agg_val_vec(), ResultSet::makeTargetValue(), QueryMemoryDescriptor(), ResultSetStorage::reduceOneSlot(), ResultSetStorage::reduceOneSlotSingleValue(), and setOutputColumnar().

997  {
998  // In distributed mode, result sets are serialized using rowwise iterators, so we use
999  // consistent slot widths for now
1000  return output_columnar_ && !g_cluster &&
1002 }
QueryDescriptionType query_desc_type_
bool g_cluster

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::isSingleColumnGroupByWithPerfectHash ( ) const
inline

Definition at line 171 of file QueryMemoryDescriptor.h.

References getGroupbyColCount(), getQueryDescriptionType(), and GroupByPerfectHash.

Referenced by GroupByAndAggregate::codegenGroupBy(), and ResultSet::getTargetValueFromBufferRowwise().

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::isWarpSyncRequired ( const ExecutorDeviceType  device_type) const

Definition at line 1052 of file QueryMemoryDescriptor.cpp.

References CHECK, executor_, and GPU.

Referenced by query_group_by_template_impl().

1053  {
1054  if (device_type != ExecutorDeviceType::GPU) {
1055  return false;
1056  } else {
1057  auto cuda_mgr = executor_->getCatalog()->getDataMgr().getCudaMgr();
1058  CHECK(cuda_mgr);
1059  return cuda_mgr->isArchVoltaOrGreaterForAll();
1060  }
1061 }
#define CHECK(condition)
Definition: Logger.h:197

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::lazyInitGroups ( const ExecutorDeviceType  device_type) const

Definition at line 1042 of file QueryMemoryDescriptor.cpp.

References count_distinct_descriptors_, countDescriptorsLogicallyEmpty(), GPU, and render_output_.

Referenced by create_dev_group_by_buffers(), QueryMemoryInitializer::QueryMemoryInitializer(), and toString().

1042  {
1043  return device_type == ExecutorDeviceType::GPU && !render_output_ &&
1045 }
CountDistinctDescriptors count_distinct_descriptors_
static bool countDescriptorsLogicallyEmpty(const CountDistinctDescriptors &count_distinct_descriptors)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static bool QueryMemoryDescriptor::many_entries ( const int64_t  max_val,
const int64_t  min_val,
const int64_t  bucket 
)
inlinestatic

Definition at line 143 of file QueryMemoryDescriptor.h.

Referenced by blocksShareMemory().

145  {
146  return max_val - min_val > 10000 * std::max(bucket, int64_t(1));
147  }

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::mustUseBaselineSort ( ) const
inline

Definition at line 272 of file QueryMemoryDescriptor.h.

References must_use_baseline_sort_.

Referenced by GroupByAndAggregate::codegenSingleColumnPerfectHash().

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::operator== ( const QueryMemoryDescriptor other) const

Definition at line 554 of file QueryMemoryDescriptor.cpp.

References bucket_, col_slot_context_, count_distinct_descriptors_, force_4byte_float_, group_col_compact_width_, group_col_widths_, has_nulls_, i, idx_target_as_key_, interleaved_bins_on_gpu_, keyless_hash_, max_val_, min_val_, output_columnar_, query_desc_type_, sort_on_gpu_, and target_groupby_indices_.

554  {
555  // Note that this method does not check ptr reference members (e.g. executor_) or
556  // entry_count_
557  if (query_desc_type_ != other.query_desc_type_) {
558  return false;
559  }
560  if (keyless_hash_ != other.keyless_hash_) {
561  return false;
562  }
564  return false;
565  }
566  if (idx_target_as_key_ != other.idx_target_as_key_) {
567  return false;
568  }
569  if (force_4byte_float_ != other.force_4byte_float_) {
570  return false;
571  }
572  if (group_col_widths_ != other.group_col_widths_) {
573  return false;
574  }
576  return false;
577  }
579  return false;
580  }
581  if (min_val_ != other.min_val_) {
582  return false;
583  }
584  if (max_val_ != other.max_val_) {
585  return false;
586  }
587  if (bucket_ != other.bucket_) {
588  return false;
589  }
590  if (has_nulls_ != other.has_nulls_) {
591  return false;
592  }
594  return false;
595  } else {
596  // Count distinct descriptors can legitimately differ in device only.
597  for (size_t i = 0; i < count_distinct_descriptors_.size(); ++i) {
598  auto ref_count_distinct_desc = other.count_distinct_descriptors_[i];
599  auto count_distinct_desc = count_distinct_descriptors_[i];
600  count_distinct_desc.device_type = ref_count_distinct_desc.device_type;
601  if (ref_count_distinct_desc != count_distinct_desc) {
602  return false;
603  }
604  }
605  }
606  if (sort_on_gpu_ != other.sort_on_gpu_) {
607  return false;
608  }
609  if (output_columnar_ != other.output_columnar_) {
610  return false;
611  }
612  if (col_slot_context_ != other.col_slot_context_) {
613  return false;
614  }
615  return true;
616 }
std::vector< int64_t > target_groupby_indices_
CountDistinctDescriptors count_distinct_descriptors_
QueryDescriptionType query_desc_type_
std::vector< int8_t > group_col_widths_
int8_t QueryMemoryDescriptor::pick_target_compact_width ( const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  query_infos,
const int8_t  crt_min_byte_width 
)
static

Definition at line 653 of file QueryMemoryDescriptor.cpp.

References CHECK, CHECK_EQ, g_bigint_count, get_col_byte_widths(), Analyzer::UOper::get_operand(), Analyzer::Expr::get_type_info(), RelAlgExecutionUnit::groupby_exprs, RelAlgExecutionUnit::input_col_descs, anonymous_namespace{QueryMemoryDescriptor.cpp}::is_int_and_no_bigger_than(), kCOUNT, kENCODING_DICT, kUNNEST, and RelAlgExecutionUnit::target_exprs.

656  {
657  if (g_bigint_count) {
658  return sizeof(int64_t);
659  }
660  int8_t compact_width{0};
661  auto col_it = ra_exe_unit.input_col_descs.begin();
662  int unnest_array_col_id{std::numeric_limits<int>::min()};
663  for (const auto& groupby_expr : ra_exe_unit.groupby_exprs) {
664  const auto uoper = dynamic_cast<Analyzer::UOper*>(groupby_expr.get());
665  if (uoper && uoper->get_optype() == kUNNEST) {
666  const auto& arg_ti = uoper->get_operand()->get_type_info();
667  CHECK(arg_ti.is_array());
668  const auto& elem_ti = arg_ti.get_elem_type();
669  if (elem_ti.is_string() && elem_ti.get_compression() == kENCODING_DICT) {
670  unnest_array_col_id = (*col_it)->getColId();
671  } else {
672  compact_width = crt_min_byte_width;
673  break;
674  }
675  }
676  ++col_it;
677  }
678  if (!compact_width &&
679  (ra_exe_unit.groupby_exprs.size() != 1 || !ra_exe_unit.groupby_exprs.front())) {
680  compact_width = crt_min_byte_width;
681  }
682  if (!compact_width) {
683  col_it = ra_exe_unit.input_col_descs.begin();
684  std::advance(col_it, ra_exe_unit.groupby_exprs.size());
685  for (const auto target : ra_exe_unit.target_exprs) {
686  const auto& ti = target->get_type_info();
687  const auto agg = dynamic_cast<const Analyzer::AggExpr*>(target);
688  if (agg && agg->get_arg()) {
689  compact_width = crt_min_byte_width;
690  break;
691  }
692 
693  if (agg) {
694  CHECK_EQ(kCOUNT, agg->get_aggtype());
695  CHECK(!agg->get_is_distinct());
696  ++col_it;
697  continue;
698  }
699 
700  if (is_int_and_no_bigger_than(ti, 4) ||
701  (ti.is_string() && ti.get_compression() == kENCODING_DICT)) {
702  ++col_it;
703  continue;
704  }
705 
706  const auto uoper = dynamic_cast<Analyzer::UOper*>(target);
707  if (uoper && uoper->get_optype() == kUNNEST &&
708  (*col_it)->getColId() == unnest_array_col_id) {
709  const auto arg_ti = uoper->get_operand()->get_type_info();
710  CHECK(arg_ti.is_array());
711  const auto& elem_ti = arg_ti.get_elem_type();
712  if (elem_ti.is_string() && elem_ti.get_compression() == kENCODING_DICT) {
713  ++col_it;
714  continue;
715  }
716  }
717 
718  compact_width = crt_min_byte_width;
719  break;
720  }
721  }
722  if (!compact_width) {
723  size_t total_tuples{0};
724  for (const auto& qi : query_infos) {
725  total_tuples += qi.info.getNumTuples();
726  }
727  return total_tuples <= static_cast<size_t>(std::numeric_limits<uint32_t>::max()) ||
728  unnest_array_col_id != std::numeric_limits<int>::min()
729  ? 4
730  : crt_min_byte_width;
731  } else {
732  // TODO(miyu): relax this condition to allow more cases just w/o padding
733  for (auto wid : get_col_byte_widths(ra_exe_unit.target_exprs)) {
734  compact_width = std::max(compact_width, wid);
735  }
736  return compact_width;
737  }
738 }
std::vector< Analyzer::Expr * > target_exprs
#define CHECK_EQ(x, y)
Definition: Logger.h:205
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
bool g_bigint_count
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:78
std::vector< int8_t > get_col_byte_widths(const T &col_expr_list)
const Expr * get_operand() const
Definition: Analyzer.h:371
Definition: sqldefs.h:76
bool is_int_and_no_bigger_than(const SQLTypeInfo &ti, const size_t byte_width)
#define CHECK(condition)
Definition: Logger.h:197
std::list< std::shared_ptr< const InputColDescriptor > > input_col_descs

+ Here is the call graph for this function:

std::string QueryMemoryDescriptor::queryDescTypeToString ( ) const

Definition at line 1120 of file QueryMemoryDescriptor.cpp.

References Estimator, GroupByBaselineHash, GroupByPerfectHash, NonGroupedAggregate, Projection, query_desc_type_, and UNREACHABLE.

Referenced by reductionKey().

1120  {
1121  switch (query_desc_type_) {
1123  return "Perfect Hash";
1125  return "Baseline Hash";
1127  return "Projection";
1129  return "Non-grouped Aggregate";
1131  return "Estimator";
1132  default:
1133  UNREACHABLE();
1134  }
1135  return "";
1136 }
#define UNREACHABLE()
Definition: Logger.h:241
QueryDescriptionType query_desc_type_

+ Here is the caller graph for this function:

std::string QueryMemoryDescriptor::reductionKey ( ) const

Definition at line 1160 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, getEffectiveKeyWidth(), getGroupbyColCount(), getTargetGroupbyIndex(), getTargetIdxForKey(), join(), keyless_hash_, queryDescTypeToString(), targetGroupbyIndicesSize(), to_string(), ColSlotContext::toString(), and toString().

Referenced by ResultSetReductionJIT::cacheKey(), and toString().

1160  {
1161  std::string str;
1162  str += "Query Memory Descriptor State\n";
1163  str += "\tQuery Type: " + queryDescTypeToString() + "\n";
1164  str +=
1165  "\tKeyless Hash: " + ::toString(keyless_hash_) +
1166  (keyless_hash_ ? ", target index for key: " + std::to_string(getTargetIdxForKey())
1167  : "") +
1168  "\n";
1169  str += "\tEffective key width: " + std::to_string(getEffectiveKeyWidth()) + "\n";
1170  str += "\tNumber of group columns: " + std::to_string(getGroupbyColCount()) + "\n";
1171  const auto group_indices_size = targetGroupbyIndicesSize();
1172  if (group_indices_size) {
1173  std::vector<std::string> group_indices_strings;
1174  for (size_t target_idx = 0; target_idx < group_indices_size; ++target_idx) {
1175  group_indices_strings.push_back(std::to_string(getTargetGroupbyIndex(target_idx)));
1176  }
1177  str += "\tTarget group by indices: " +
1178  boost::algorithm::join(group_indices_strings, ",") + "\n";
1179  }
1180  str += "\t" + col_slot_context_.toString();
1181  return str;
1182 }
int64_t getTargetGroupbyIndex(const size_t target_idx) const
std::string toString() const
std::string join(T const &container, std::string const &delim)
size_t getEffectiveKeyWidth() const
std::string to_string(char const *&&v)
size_t getGroupbyColCount() const
size_t targetGroupbyIndicesSize() const
std::string toString() const
std::string queryDescTypeToString() const
int32_t getTargetIdxForKey() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void QueryMemoryDescriptor::resetGroupColWidths ( const std::vector< int8_t > &  new_group_col_widths)
inlineprotected

Definition at line 331 of file QueryMemoryDescriptor.h.

References group_col_widths_.

331  {
332  group_col_widths_ = new_group_col_widths;
333  }
std::vector< int8_t > group_col_widths_
void QueryMemoryDescriptor::setAllTargetGroupbyIndices ( std::vector< int64_t >  group_by_indices)
inline

Definition at line 232 of file QueryMemoryDescriptor.h.

References target_groupby_indices_.

232  {
233  target_groupby_indices_ = group_by_indices;
234  }
std::vector< int64_t > target_groupby_indices_
void QueryMemoryDescriptor::setEntryCount ( const size_t  val)
inline

Definition at line 246 of file QueryMemoryDescriptor.h.

References entry_count_.

Referenced by Executor::executePlanWithGroupBy(), Executor::reduceMultiDeviceResultSets(), ResultSetStorage::updateEntryCount(), and ResultSet::updateStorageEntryCount().

246 { entry_count_ = val; }

+ Here is the caller graph for this function:

void QueryMemoryDescriptor::setForceFourByteFloat ( const bool  val)
inline

Definition at line 277 of file QueryMemoryDescriptor.h.

References force_4byte_float_.

void QueryMemoryDescriptor::setGroupColCompactWidth ( const int8_t  val)
inline

Definition at line 198 of file QueryMemoryDescriptor.h.

References group_col_compact_width_.

void QueryMemoryDescriptor::setHasInterleavedBinsOnGpu ( const bool  val)
inline

Definition at line 180 of file QueryMemoryDescriptor.h.

References interleaved_bins_on_gpu_.

void QueryMemoryDescriptor::setHasKeylessHash ( const bool  val)
inline

Definition at line 177 of file QueryMemoryDescriptor.h.

References keyless_hash_.

void QueryMemoryDescriptor::setOutputColumnar ( const bool  val)

Definition at line 986 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, isLogicalSizedColumnsAllowed(), output_columnar_, and ColSlotContext::setAllSlotsPaddedSizeToLogicalSize().

Referenced by Executor::executeTableFunction(), TableFunctionExecutionContext::launchCpuCode(), and TableFunctionExecutionContext::launchGpuCode().

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void QueryMemoryDescriptor::setQueryDescriptionType ( const QueryDescriptionType  val)
inline

Definition at line 170 of file QueryMemoryDescriptor.h.

References query_desc_type_.

170 { query_desc_type_ = val; }
QueryDescriptionType query_desc_type_
void QueryMemoryDescriptor::setTargetIdxForKey ( const int32_t  val)
inline

Definition at line 183 of file QueryMemoryDescriptor.h.

References idx_target_as_key_.

bool QueryMemoryDescriptor::sortOnGpu ( ) const
inline

Definition at line 262 of file QueryMemoryDescriptor.h.

References sort_on_gpu_.

Referenced by alignPaddedSlots(), QueryExecutionContext::launchGpuCode(), ExecutionKernel::runImpl(), and use_speculative_top_n().

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::targetGroupbyIndicesSize ( ) const
inline
size_t QueryMemoryDescriptor::targetGroupbyNegativeIndicesSize ( ) const
inline

Definition at line 237 of file QueryMemoryDescriptor.h.

References target_groupby_indices_.

237  {
238  return std::count_if(
239  target_groupby_indices_.begin(),
241  [](const int64_t& target_group_by_index) { return target_group_by_index < 0; });
242  }
std::vector< int64_t > target_groupby_indices_
bool QueryMemoryDescriptor::threadsShareMemory ( ) const
std::string QueryMemoryDescriptor::toString ( ) const

Definition at line 1138 of file QueryMemoryDescriptor.cpp.

References allow_multifrag_, blocksShareMemory(), bucket_, entry_count_, GPU, interleaved_bins_on_gpu_, lazyInitGroups(), max_val_, min_val_, must_use_baseline_sort_, output_columnar_, reductionKey(), render_output_, sort_on_gpu_, threadsShareMemory(), to_string(), use_streaming_top_n_, and usesGetGroupValueFast().

Referenced by Executor::createKernels(), and reductionKey().

1138  {
1139  auto str = reductionKey();
1140  str += "\tAllow Multifrag: " + ::toString(allow_multifrag_) + "\n";
1141  str += "\tInterleaved Bins on GPU: " + ::toString(interleaved_bins_on_gpu_) + "\n";
1142  str += "\tBlocks Share Memory: " + ::toString(blocksShareMemory()) + "\n";
1143  str += "\tThreads Share Memory: " + ::toString(threadsShareMemory()) + "\n";
1144  str += "\tUses Fast Group Values: " + ::toString(usesGetGroupValueFast()) + "\n";
1145  str +=
1146  "\tLazy Init Groups (GPU): " + ::toString(lazyInitGroups(ExecutorDeviceType::GPU)) +
1147  "\n";
1148  str += "\tEntry Count: " + std::to_string(entry_count_) + "\n";
1149  str += "\tMin Val (perfect hash only): " + std::to_string(min_val_) + "\n";
1150  str += "\tMax Val (perfect hash only): " + std::to_string(max_val_) + "\n";
1151  str += "\tBucket Val (perfect hash only): " + std::to_string(bucket_) + "\n";
1152  str += "\tSort on GPU: " + ::toString(sort_on_gpu_) + "\n";
1153  str += "\tUse Streaming Top N: " + ::toString(use_streaming_top_n_) + "\n";
1154  str += "\tOutput Columnar: " + ::toString(output_columnar_) + "\n";
1155  str += "\tRender Output: " + ::toString(render_output_) + "\n";
1156  str += "\tUse Baseline Sort: " + ::toString(must_use_baseline_sort_) + "\n";
1157  return str;
1158 }
std::string toString() const
std::string to_string(char const *&&v)
bool lazyInitGroups(const ExecutorDeviceType) const
std::string reductionKey() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static TResultSetBufferDescriptor QueryMemoryDescriptor::toThrift ( const QueryMemoryDescriptor )
static
int8_t QueryMemoryDescriptor::updateActualMinByteWidth ( const int8_t  actual_min_byte_width) const

Definition at line 1096 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getMinPaddedByteSize().

1097  {
1098  return col_slot_context_.getMinPaddedByteSize(actual_min_byte_width);
1099 }
int8_t getMinPaddedByteSize(const int8_t actual_min_byte_width) const

+ Here is the call graph for this function:

void QueryMemoryDescriptor::useConsistentSlotWidthSize ( const int8_t  slot_width_size)

Definition at line 1087 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::setAllSlotsSize().

1087  {
1088  col_slot_context_.setAllSlotsSize(slot_width_size);
1089 }
void setAllSlotsSize(const int8_t slot_width_size)

+ Here is the call graph for this function:

bool QueryMemoryDescriptor::usesGetGroupValueFast ( ) const

Definition at line 1015 of file QueryMemoryDescriptor.cpp.

References getGroupbyColCount(), GroupByPerfectHash, and query_desc_type_.

Referenced by canOutputColumnar(), GroupByAndAggregate::codegen(), GroupByAndAggregate::codegenSingleColumnPerfectHash(), and toString().

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::useStreamingTopN ( ) const
inline

Definition at line 268 of file QueryMemoryDescriptor.h.

References use_streaming_top_n_.

Referenced by GroupByAndAggregate::codegen(), GroupByAndAggregate::codegenOutputSlot(), QueryMemoryInitializer::copyGroupByBuffersFromGpu(), QueryMemoryInitializer::initGroupByBuffer(), QueryExecutionContext::launchCpuCode(), and QueryExecutionContext::launchGpuCode().

+ Here is the caller graph for this function:

Friends And Related Function Documentation

friend class QueryExecutionContext
friend

Definition at line 373 of file QueryMemoryDescriptor.h.

Referenced by getQueryExecutionContext().

friend class ResultSet
friend

Definition at line 372 of file QueryMemoryDescriptor.h.

Member Data Documentation

bool QueryMemoryDescriptor::allow_multifrag_
private

Definition at line 337 of file QueryMemoryDescriptor.h.

Referenced by toString().

int64_t QueryMemoryDescriptor::bucket_
private

Definition at line 354 of file QueryMemoryDescriptor.h.

Referenced by blocksShareMemory(), getBucket(), operator==(), and toString().

size_t QueryMemoryDescriptor::entry_count_
private
const Executor* QueryMemoryDescriptor::executor_
private
bool QueryMemoryDescriptor::force_4byte_float_
private

Definition at line 364 of file QueryMemoryDescriptor.h.

Referenced by forceFourByteFloat(), operator==(), and setForceFourByteFloat().

int8_t QueryMemoryDescriptor::group_col_compact_width_
private
bool QueryMemoryDescriptor::has_nulls_
private

Definition at line 355 of file QueryMemoryDescriptor.h.

Referenced by hasNulls(), and operator==().

int32_t QueryMemoryDescriptor::idx_target_as_key_
private

Definition at line 341 of file QueryMemoryDescriptor.h.

Referenced by getTargetIdxForKey(), operator==(), and setTargetIdxForKey().

bool QueryMemoryDescriptor::interleaved_bins_on_gpu_
private
bool QueryMemoryDescriptor::is_table_function_
private

Definition at line 361 of file QueryMemoryDescriptor.h.

Referenced by blocksShareMemory().

bool QueryMemoryDescriptor::keyless_hash_
private
int64_t QueryMemoryDescriptor::max_val_
private

Definition at line 353 of file QueryMemoryDescriptor.h.

Referenced by blocksShareMemory(), getMaxVal(), operator==(), and toString().

int64_t QueryMemoryDescriptor::min_val_
private

Definition at line 351 of file QueryMemoryDescriptor.h.

Referenced by blocksShareMemory(), getMinVal(), operator==(), and toString().

bool QueryMemoryDescriptor::must_use_baseline_sort_
private

Definition at line 360 of file QueryMemoryDescriptor.h.

Referenced by mustUseBaselineSort(), and toString().

bool QueryMemoryDescriptor::render_output_
private

Definition at line 359 of file QueryMemoryDescriptor.h.

Referenced by blocksShareMemory(), lazyInitGroups(), and toString().

bool QueryMemoryDescriptor::sort_on_gpu_
private

Definition at line 357 of file QueryMemoryDescriptor.h.

Referenced by operator==(), QueryMemoryDescriptor(), sortOnGpu(), and toString().

std::vector<int64_t> QueryMemoryDescriptor::target_groupby_indices_
private
bool QueryMemoryDescriptor::use_streaming_top_n_
private

The documentation for this class was generated from the following files: