OmniSciDB  b24e664e58
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
TopKSort.cu File Reference
#include "BufferEntryUtils.h"
#include "GpuMemUtils.h"
#include "ResultSetBufferAccessors.h"
#include "SortUtils.cuh"
#include "StreamingTopN.h"
#include "TopKSort.h"
#include <thrust/copy.h>
#include <thrust/execution_policy.h>
#include <thrust/functional.h>
#include <thrust/partition.h>
#include <thrust/sort.h>
#include <iostream>
+ Include dependency graph for TopKSort.cu:

Go to the source code of this file.

Classes

struct  is_taken_entry< K, I >
 
struct  is_null_order_entry< K, I >
 
struct  KeyFetcher< K, I >
 
struct  KeyReseter< K >
 
struct  RowFetcher< I >
 

Functions

template<typename ForwardIterator >
ForwardIterator partition_by_null (ForwardIterator first, ForwardIterator last, const int64_t null_val, const bool nulls_first, const int8_t *rows_ptr, const GroupByBufferLayoutInfo &layout)
 
template<class K , class I >
void collect_order_entry_column (thrust::device_ptr< K > &d_oe_col_buffer, const int8_t *d_src_buffer, const thrust::device_ptr< I > &d_idx_first, const size_t idx_count, const size_t oe_offset, const size_t oe_stride)
 
template<class K , class I >
void sort_indices_by_key (thrust::device_ptr< I > d_idx_first, const size_t idx_count, const thrust::device_ptr< K > &d_key_buffer, const bool desc, ThrustAllocator &allocator)
 
template<class I = int32_t>
void do_radix_sort (thrust::device_ptr< I > d_idx_first, const size_t idx_count, const int8_t *d_src_buffer, const PodOrderEntry &oe, const GroupByBufferLayoutInfo &layout, ThrustAllocator &allocator)
 
template<typename DerivedPolicy >
void reset_keys_in_row_buffer (const thrust::detail::execution_policy_base< DerivedPolicy > &exec, int8_t *row_buffer, const size_t key_width, const size_t row_size, const size_t first, const size_t last)
 
std::vector< int8_t > pop_n_rows_from_merged_heaps_gpu (Data_Namespace::DataMgr *data_mgr, const int64_t *dev_heaps, const size_t heaps_size, const size_t n, const PodOrderEntry &oe, const GroupByBufferLayoutInfo &layout, const size_t group_key_bytes, const size_t thread_count, const int device_id)
 

Function Documentation

template<class K , class I >
void collect_order_entry_column ( thrust::device_ptr< K > &  d_oe_col_buffer,
const int8_t *  d_src_buffer,
const thrust::device_ptr< I > &  d_idx_first,
const size_t  idx_count,
const size_t  oe_offset,
const size_t  oe_stride 
)

Definition at line 138 of file TopKSort.cu.

Referenced by do_radix_sort().

143  {
144  thrust::for_each(thrust::make_counting_iterator(size_t(0)),
145  thrust::make_counting_iterator(idx_count),
146  KeyFetcher<K, I>(thrust::raw_pointer_cast(d_oe_col_buffer),
147  d_src_buffer + oe_offset,
148  oe_stride,
149  thrust::raw_pointer_cast(d_idx_first)));
150 }

+ Here is the caller graph for this function:

template<class I = int32_t>
void do_radix_sort ( thrust::device_ptr< I >  d_idx_first,
const size_t  idx_count,
const int8_t *  d_src_buffer,
const PodOrderEntry oe,
const GroupByBufferLayoutInfo layout,
ThrustAllocator allocator 
)

Definition at line 171 of file TopKSort.cu.

References CHECK(), GroupByBufferLayoutInfo::col_bytes, GroupByBufferLayoutInfo::col_off, collect_order_entry_column(), PodOrderEntry::is_desc, GroupByBufferLayoutInfo::oe_target_info, GroupByBufferLayoutInfo::row_bytes, sort_indices_by_key(), and TargetInfo::sql_type.

Referenced by pop_n_rows_from_merged_heaps_gpu().

176  {
177  const auto& oe_type = layout.oe_target_info.sql_type;
178  if (oe_type.is_fp()) {
179  switch (layout.col_bytes) {
180  case 4: {
181  auto d_oe_buffer = get_device_ptr<float>(idx_count, allocator);
182  collect_order_entry_column(d_oe_buffer,
183  d_src_buffer,
184  d_idx_first,
185  idx_count,
186  layout.col_off,
187  layout.row_bytes);
188  sort_indices_by_key(d_idx_first, idx_count, d_oe_buffer, oe.is_desc, allocator);
189  break;
190  }
191  case 8: {
192  auto d_oe_buffer = get_device_ptr<double>(idx_count, allocator);
193  collect_order_entry_column(d_oe_buffer,
194  d_src_buffer,
195  d_idx_first,
196  idx_count,
197  layout.col_off,
198  layout.row_bytes);
199  sort_indices_by_key(d_idx_first, idx_count, d_oe_buffer, oe.is_desc, allocator);
200  break;
201  }
202  default:
203  CHECK(false);
204  }
205  return;
206  }
207  CHECK(oe_type.is_number() || oe_type.is_time());
208  switch (layout.col_bytes) {
209  case 4: {
210  auto d_oe_buffer = get_device_ptr<int32_t>(idx_count, allocator);
211  collect_order_entry_column(d_oe_buffer,
212  d_src_buffer,
213  d_idx_first,
214  idx_count,
215  layout.col_off,
216  layout.row_bytes);
217  sort_indices_by_key(d_idx_first, idx_count, d_oe_buffer, oe.is_desc, allocator);
218  break;
219  }
220  case 8: {
221  auto d_oe_buffer = get_device_ptr<int64_t>(idx_count, allocator);
222  collect_order_entry_column(d_oe_buffer,
223  d_src_buffer,
224  d_idx_first,
225  idx_count,
226  layout.col_off,
227  layout.row_bytes);
228  sort_indices_by_key(d_idx_first, idx_count, d_oe_buffer, oe.is_desc, allocator);
229  break;
230  }
231  default:
232  CHECK(false);
233  }
234 }
SQLTypeInfo sql_type
Definition: TargetInfo.h:42
CHECK(cgen_state)
bool is_desc
const TargetInfo oe_target_info
void collect_order_entry_column(thrust::device_ptr< K > &d_oe_col_buffer, const int8_t *d_src_buffer, const thrust::device_ptr< I > &d_idx_first, const size_t idx_count, const size_t oe_offset, const size_t oe_stride)
Definition: TopKSort.cu:138
void sort_indices_by_key(thrust::device_ptr< I > d_idx_first, const size_t idx_count, const thrust::device_ptr< K > &d_key_buffer, const bool desc, ThrustAllocator &allocator)
Definition: TopKSort.cu:153

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename ForwardIterator >
ForwardIterator partition_by_null ( ForwardIterator  first,
ForwardIterator  last,
const int64_t  null_val,
const bool  nulls_first,
const int8_t *  rows_ptr,
const GroupByBufferLayoutInfo layout 
)

Definition at line 72 of file TopKSort.cu.

References GroupByBufferLayoutInfo::col_bytes, GroupByBufferLayoutInfo::col_off, and GroupByBufferLayoutInfo::row_bytes.

Referenced by pop_n_rows_from_merged_heaps_gpu().

77  {
78  if (nulls_first) {
79  return (layout.col_bytes == 4)
80  ? thrust::partition(
81  first,
82  last,
84  rows_ptr + layout.col_off, layout.row_bytes, null_val))
85  : thrust::partition(
86  first,
87  last,
88  is_null_order_entry<int64_t>(
89  rows_ptr + layout.col_off, layout.row_bytes, null_val));
90  } else {
91  return (layout.col_bytes == 4)
92  ? thrust::partition(
93  first,
94  last,
95  thrust::not1(is_null_order_entry<int32_t>(
96  rows_ptr + layout.col_off, layout.row_bytes, null_val)))
97  : thrust::partition(
98  first,
99  last,
100  thrust::not1(is_null_order_entry<int64_t>(
101  rows_ptr + layout.col_off, layout.row_bytes, null_val)));
102  }
103 }
Definition: TopKSort.cu:50

+ Here is the caller graph for this function:

std::vector<int8_t> pop_n_rows_from_merged_heaps_gpu ( Data_Namespace::DataMgr data_mgr,
const int64_t *  dev_heaps,
const size_t  heaps_size,
const size_t  n,
const PodOrderEntry oe,
const GroupByBufferLayoutInfo layout,
const size_t  group_key_bytes,
const size_t  thread_count,
const int  device_id 
)

Definition at line 281 of file TopKSort.cu.

References CHECK_EQ, GroupByBufferLayoutInfo::col_bytes, do_radix_sort(), streaming_top_n::get_heap_size(), streaming_top_n::get_rows_offset_of_heaps(), null_val_bit_pattern(), PodOrderEntry::nulls_first, GroupByBufferLayoutInfo::oe_target_info, partition_by_null(), reset_keys_in_row_buffer(), GroupByBufferLayoutInfo::row_bytes, and TargetInfo::sql_type.

290  {
291  const auto row_size = layout.row_bytes;
292  CHECK_EQ(heaps_size, streaming_top_n::get_heap_size(row_size, n, thread_count));
293  const int8_t* rows_ptr = reinterpret_cast<const int8_t*>(dev_heaps) +
295  const auto total_entry_count = n * thread_count;
296  ThrustAllocator thrust_allocator(data_mgr, device_id);
297  auto d_indices = get_device_ptr<int32_t>(total_entry_count, thrust_allocator);
298  thrust::sequence(d_indices, d_indices + total_entry_count);
299  auto separator = (group_key_bytes == 4)
300  ? thrust::partition(d_indices,
301  d_indices + total_entry_count,
302  is_taken_entry<int32_t>(rows_ptr, row_size))
303  : thrust::partition(d_indices,
304  d_indices + total_entry_count,
305  is_taken_entry<int64_t>(rows_ptr, row_size));
306  const size_t actual_entry_count = separator - d_indices;
307  if (!actual_entry_count) {
308  std::vector<int8_t> top_rows(row_size * n);
310  thrust::host, &top_rows[0], layout.col_bytes, row_size, 0, n);
311  return top_rows;
312  }
313 
314  const auto& oe_type = layout.oe_target_info.sql_type;
315  if (oe_type.get_notnull()) {
316  do_radix_sort(d_indices, actual_entry_count, rows_ptr, oe, layout, thrust_allocator);
317  } else {
318  auto separator = partition_by_null(d_indices,
319  d_indices + actual_entry_count,
320  null_val_bit_pattern(oe_type, false),
321  oe.nulls_first,
322  rows_ptr,
323  layout);
324  if (oe.nulls_first) {
325  const size_t null_count = separator - d_indices;
326  if (null_count < actual_entry_count) {
327  do_radix_sort(separator,
328  actual_entry_count - null_count,
329  rows_ptr,
330  oe,
331  layout,
332  thrust_allocator);
333  }
334  } else {
335  const size_t nonnull_count = separator - d_indices;
336  if (nonnull_count > 0) {
337  do_radix_sort(d_indices, nonnull_count, rows_ptr, oe, layout, thrust_allocator);
338  }
339  }
340  }
341 
342  const auto final_entry_count = std::min(n, actual_entry_count);
343  auto d_top_rows = get_device_ptr<int8_t>(row_size * n, thrust_allocator);
344  thrust::for_each(thrust::make_counting_iterator(size_t(0)),
345  thrust::make_counting_iterator(final_entry_count),
346  RowFetcher<int32_t>(thrust::raw_pointer_cast(d_top_rows),
347  rows_ptr,
348  thrust::raw_pointer_cast(d_indices),
349  row_size));
350 
351  if (final_entry_count < n) {
352  reset_keys_in_row_buffer(thrust::device,
353  thrust::raw_pointer_cast(d_top_rows),
354  layout.col_bytes,
355  row_size,
356  final_entry_count,
357  n);
358  }
359 
360  std::vector<int8_t> top_rows(row_size * n);
361  thrust::copy(d_top_rows, d_top_rows + row_size * n, top_rows.begin());
362  return top_rows;
363 }
#define CHECK_EQ(x, y)
Definition: Logger.h:198
void reset_keys_in_row_buffer(const thrust::detail::execution_policy_base< DerivedPolicy > &exec, int8_t *row_buffer, const size_t key_width, const size_t row_size, const size_t first, const size_t last)
Definition: TopKSort.cu:254
SQLTypeInfo sql_type
Definition: TargetInfo.h:42
size_t get_rows_offset_of_heaps(const size_t n, const size_t thread_count)
Definition: TopKSort.cu:39
int64_t null_val_bit_pattern(const SQLTypeInfo &ti, const bool float_argument_input)
bool nulls_first
size_t get_heap_size(const size_t row_size, const size_t n, const size_t thread_count)
const TargetInfo oe_target_info
ForwardIterator partition_by_null(ForwardIterator first, ForwardIterator last, const int64_t null_val, const bool nulls_first, const int8_t *rows_ptr, const GroupByBufferLayoutInfo &layout)
Definition: TopKSort.cu:72
void do_radix_sort(thrust::device_ptr< I > d_idx_first, const size_t idx_count, const int8_t *d_src_buffer, const PodOrderEntry &oe, const GroupByBufferLayoutInfo &layout, ThrustAllocator &allocator)
Definition: TopKSort.cu:171

+ Here is the call graph for this function:

template<typename DerivedPolicy >
void reset_keys_in_row_buffer ( const thrust::detail::execution_policy_base< DerivedPolicy > &  exec,
int8_t *  row_buffer,
const size_t  key_width,
const size_t  row_size,
const size_t  first,
const size_t  last 
)

Definition at line 254 of file TopKSort.cu.

References CHECK(), EMPTY_KEY_32, and EMPTY_KEY_64.

Referenced by pop_n_rows_from_merged_heaps_gpu().

260  {
261  switch (key_width) {
262  case 4:
263  thrust::for_each(
264  exec,
265  thrust::make_counting_iterator(first),
266  thrust::make_counting_iterator(last),
267  KeyReseter<int32_t>(row_buffer, row_size, static_cast<int32_t>(EMPTY_KEY_32)));
268  break;
269  case 8:
270  thrust::for_each(
271  exec,
272  thrust::make_counting_iterator(first),
273  thrust::make_counting_iterator(last),
274  KeyReseter<int64_t>(row_buffer, row_size, static_cast<int64_t>(EMPTY_KEY_64)));
275  break;
276  default:
277  CHECK(false);
278  }
279 }
#define EMPTY_KEY_64
CHECK(cgen_state)
#define EMPTY_KEY_32

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<class K , class I >
void sort_indices_by_key ( thrust::device_ptr< I >  d_idx_first,
const size_t  idx_count,
const thrust::device_ptr< K > &  d_key_buffer,
const bool  desc,
ThrustAllocator allocator 
)

Definition at line 153 of file TopKSort.cu.

Referenced by do_radix_sort().

157  {
158  if (desc) {
159  thrust::sort_by_key(thrust::device(allocator),
160  d_key_buffer,
161  d_key_buffer + idx_count,
162  d_idx_first,
163  thrust::greater<K>());
164  } else {
165  thrust::sort_by_key(
166  thrust::device(allocator), d_key_buffer, d_key_buffer + idx_count, d_idx_first);
167  }
168 }

+ Here is the caller graph for this function: