_streaming_top_n_8cpp_source.html

 /*

  * Copyright 2022 HEAVY.AI, Inc.

  *

  * Licensed under the Apache License, Version 2.0 (the "License");

  * you may not use this file except in compliance with the License.

  * You may obtain a copy of the License at

  *

  *     http://www.apache.org/licenses/LICENSE-2.0

  *

  * Unless required by applicable law or agreed to in writing, software

  * distributed under the License is distributed on an "AS IS" BASIS,

  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

  * See the License for the specific language governing permissions and

  * limitations under the License.

  */


 #include "StreamingTopN.h"

 #include "RelAlgExecutor.h"

 #include "Shared/checked_alloc.h"

 #include "TopKSort.h"


 namespace streaming_top_n {


 size_t get_heap_size(const size_t row_size, const size_t n, const size_t thread_count) {

   const auto row_size_quad = row_size / sizeof(int64_t);

   return (1 + n + row_size_quad * n) * thread_count * sizeof(int64_t);

 }


 size_t get_rows_offset_of_heaps(const size_t n, const size_t thread_count) {

   return (1 + n) * thread_count * sizeof(int64_t);

 }


 std::vector<int8_t> get_rows_copy_from_heaps(const int64_t* heaps,

                                              const size_t heaps_size,

                                              const size_t n,

                                              const size_t thread_count) {

   const auto rows_offset = streaming_top_n::get_rows_offset_of_heaps(n, thread_count);

   const auto row_buff_size = heaps_size - rows_offset;

   std::vector<int8_t> rows_copy(row_buff_size);

   const auto rows_ptr = reinterpret_cast<const int8_t*>(heaps) + rows_offset;

   std::memcpy(&rows_copy[0], rows_ptr, row_buff_size);

   return rows_copy;

 }


 }  // namespace streaming_top_n


 size_t get_heap_key_slot_index(const std::vector<Analyzer::Expr*>& target_exprs,

                                const size_t target_idx) {

   size_t slot_idx = 0;

   for (size_t i = 0; i < target_idx; ++i) {

     auto agg_info = get_target_info(target_exprs[i], g_bigint_count);

     slot_idx = advance_slot(slot_idx, agg_info, false);

   }

   return slot_idx;

 }


 #ifdef HAVE_CUDA

 std::vector<int8_t> pick_top_n_rows_from_dev_heaps(

     Data_Namespace::DataMgr* data_mgr,

     const int64_t* dev_heaps_buffer,

     const RelAlgExecutionUnit& ra_exe_unit,

     const QueryMemoryDescriptor& query_mem_desc,

     const size_t thread_count,

     const int device_id) {

   CHECK(!query_mem_desc.canOutputColumnar());

   CHECK_EQ(ra_exe_unit.sort_info.order_entries.size(), size_t(1));

   const auto& only_oe = ra_exe_unit.sort_info.order_entries.back();

   const auto oe_col_idx = only_oe.tle_no - 1;

   const auto n = ra_exe_unit.sort_info.offset + ra_exe_unit.sort_info.limit.value_or(0);

   const auto group_key_bytes = query_mem_desc.getEffectiveKeyWidth();

   const PodOrderEntry pod_oe{only_oe.tle_no, only_oe.is_desc, only_oe.nulls_first};

   const auto key_slot_idx = get_heap_key_slot_index(ra_exe_unit.target_exprs, oe_col_idx);

   GroupByBufferLayoutInfo oe_layout{

       n * thread_count,

       query_mem_desc.getColOffInBytes(key_slot_idx),

       static_cast<size_t>(query_mem_desc.getPaddedSlotWidthBytes(oe_col_idx)),

       query_mem_desc.getRowSize(),

       get_target_info(ra_exe_unit.target_exprs[oe_col_idx], g_bigint_count),

       -1};

   return pop_n_rows_from_merged_heaps_gpu(

       data_mgr,

       dev_heaps_buffer,

       query_mem_desc.getBufferSizeBytes(

           ra_exe_unit, thread_count, ExecutorDeviceType::GPU),

       n,

       pod_oe,

       oe_layout,

       group_key_bytes,

       thread_count,

       device_id);

 }

 #endif  // HAVE_CUDA

RelAlgExecutionUnit::target_exprs
std::vector< Analyzer::Expr * > target_exprs
Definition: RelAlgExecutionUnit.h:172

CHECK_EQ
#define CHECK_EQ(x, y)
Definition: Logger.h:301

QueryMemoryDescriptor::getBufferSizeBytes
size_t getBufferSizeBytes(const RelAlgExecutionUnit &ra_exe_unit, const unsigned thread_count, const ExecutorDeviceType device_type) const
Definition: QueryMemoryDescriptor.cpp:1047

TopKSort.h

StreamingTopN.h
Streaming Top N algorithm.

QueryMemoryDescriptor
Definition: QueryMemoryDescriptor.h:68

streaming_top_n::get_rows_offset_of_heaps
size_t get_rows_offset_of_heaps(const size_t n, const size_t thread_count)
Definition: StreamingTopN.cpp:29

QueryMemoryDescriptor::getEffectiveKeyWidth
size_t getEffectiveKeyWidth() const
Definition: QueryMemoryDescriptor.h:347

pop_n_rows_from_merged_heaps_gpu
std::vector< int8_t > pop_n_rows_from_merged_heaps_gpu(Data_Namespace::DataMgr *data_mgr, const int64_t *dev_heaps, const size_t heaps_size, const size_t n, const PodOrderEntry &oe, const GroupByBufferLayoutInfo &layout, const size_t group_key_bytes, const size_t thread_count, const int device_id)
Definition: TopKSort.cu:303

get_heap_key_slot_index
size_t get_heap_key_slot_index(const std::vector< Analyzer::Expr * > &target_exprs, const size_t target_idx)
Definition: StreamingTopN.cpp:47

get_target_info
TargetInfo get_target_info(const Analyzer::Expr *target_expr, const bool bigint_count)
Definition: TargetInfo.h:92

QueryMemoryDescriptor::getRowSize
size_t getRowSize() const
Definition: QueryMemoryDescriptor.cpp:835

ExecutorDeviceType::GPU

advance_slot
size_t advance_slot(const size_t j, const TargetInfo &target_info, const bool separate_varlen_storage)
Definition: ResultSetBufferAccessors.h:75

RelAlgExecutionUnit::sort_info
const SortInfo sort_info
Definition: RelAlgExecutionUnit.h:175

SortInfo::offset
size_t offset
Definition: RelAlgExecutionUnit.h:155

SortInfo::limit
std::optional< size_t > limit
Definition: RelAlgExecutionUnit.h:154

SortInfo::order_entries
std::list< Analyzer::OrderEntry > order_entries
Definition: RelAlgExecutionUnit.h:152

g_bigint_count
bool g_bigint_count
Definition: GroupByAndAggregate.cpp:54

QueryMemoryDescriptor::getPaddedSlotWidthBytes
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
Definition: QueryMemoryDescriptor.cpp:1189

RelAlgExecutor.h

PodOrderEntry::tle_no
int tle_no
Definition: ResultSetSortImpl.h:24

Data_Namespace::DataMgr
Definition: DataMgr.h:125

streaming_top_n::get_rows_copy_from_heaps
std::vector< int8_t > get_rows_copy_from_heaps(const int64_t *heaps, const size_t heaps_size, const size_t n, const size_t thread_count)
Definition: StreamingTopN.cpp:33

checked_alloc.h

QueryMemoryDescriptor::canOutputColumnar
bool canOutputColumnar() const
Definition: QueryMemoryDescriptor.cpp:1241

streaming_top_n::get_heap_size
size_t get_heap_size(const size_t row_size, const size_t n, const size_t thread_count)
Definition: StreamingTopN.cpp:24

CHECK
#define CHECK(condition)
Definition: Logger.h:291

PodOrderEntry
Definition: ResultSetSortImpl.h:23

GroupByBufferLayoutInfo
Definition: ResultSetSortImpl.h:29

anonymous_namespace{Utm.h}::n
constexpr double n
Definition: Utm.h:38

QueryMemoryDescriptor::getColOffInBytes
size_t getColOffInBytes(const size_t col_idx) const
Definition: QueryMemoryDescriptor.cpp:905

query_mem_desc
query_mem_desc
Definition: QueryMemoryInitializer.cpp:479

RelAlgExecutionUnit
Definition: RelAlgExecutionUnit.h:165