OmniSciDB  06b3bd477c
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
TopKSort.cu File Reference
#include "BufferEntryUtils.h"
#include "GpuMemUtils.h"
#include "ResultSetBufferAccessors.h"
#include "SortUtils.cuh"
#include "StreamingTopN.h"
#include "TopKSort.h"
#include <thrust/copy.h>
#include <thrust/execution_policy.h>
#include <thrust/functional.h>
#include <thrust/partition.h>
#include <thrust/sort.h>
#include <iostream>
+ Include dependency graph for TopKSort.cu:

Go to the source code of this file.

Classes

struct  is_taken_entry< K, I >
 
struct  is_null_order_entry< K, I >
 
struct  KeyFetcher< K, I >
 
struct  KeyReseter< K >
 
struct  RowFetcher< I >
 

Functions

template<typename ForwardIterator >
ForwardIterator partition_by_null (ForwardIterator first, ForwardIterator last, const int64_t null_val, const bool nulls_first, const int8_t *rows_ptr, const GroupByBufferLayoutInfo &layout)
 
template<class K , class I >
void collect_order_entry_column (thrust::device_ptr< K > &d_oe_col_buffer, const int8_t *d_src_buffer, const thrust::device_ptr< I > &d_idx_first, const size_t idx_count, const size_t oe_offset, const size_t oe_stride, ThrustAllocator &allocator)
 
template<class K , class I >
void sort_indices_by_key (thrust::device_ptr< I > d_idx_first, const size_t idx_count, const thrust::device_ptr< K > &d_key_buffer, const bool desc, ThrustAllocator &allocator)
 
template<class I = int32_t>
void do_radix_sort (thrust::device_ptr< I > d_idx_first, const size_t idx_count, const int8_t *d_src_buffer, const PodOrderEntry &oe, const GroupByBufferLayoutInfo &layout, ThrustAllocator &allocator)
 
template<typename DerivedPolicy >
void reset_keys_in_row_buffer (const thrust::detail::execution_policy_base< DerivedPolicy > &exec, int8_t *row_buffer, const size_t key_width, const size_t row_size, const size_t first, const size_t last)
 
std::vector< int8_t > pop_n_rows_from_merged_heaps_gpu (Data_Namespace::DataMgr *data_mgr, const int64_t *dev_heaps, const size_t heaps_size, const size_t n, const PodOrderEntry &oe, const GroupByBufferLayoutInfo &layout, const size_t group_key_bytes, const size_t thread_count, const int device_id)
 

Function Documentation

template<class K , class I >
void collect_order_entry_column ( thrust::device_ptr< K > &  d_oe_col_buffer,
const int8_t *  d_src_buffer,
const thrust::device_ptr< I > &  d_idx_first,
const size_t  idx_count,
const size_t  oe_offset,
const size_t  oe_stride,
ThrustAllocator allocator 
)

Definition at line 138 of file TopKSort.cu.

Referenced by do_radix_sort().

144  {
145  thrust::for_each(thrust::device(allocator),
146  thrust::make_counting_iterator(size_t(0)),
147  thrust::make_counting_iterator(idx_count),
148  KeyFetcher<K, I>(thrust::raw_pointer_cast(d_oe_col_buffer),
149  d_src_buffer + oe_offset,
150  oe_stride,
151  thrust::raw_pointer_cast(d_idx_first)));
152 }

+ Here is the caller graph for this function:

template<class I = int32_t>
void do_radix_sort ( thrust::device_ptr< I >  d_idx_first,
const size_t  idx_count,
const int8_t *  d_src_buffer,
const PodOrderEntry oe,
const GroupByBufferLayoutInfo layout,
ThrustAllocator allocator 
)

Definition at line 173 of file TopKSort.cu.

References CHECK(), GroupByBufferLayoutInfo::col_bytes, GroupByBufferLayoutInfo::col_off, collect_order_entry_column(), PodOrderEntry::is_desc, GroupByBufferLayoutInfo::oe_target_info, GroupByBufferLayoutInfo::row_bytes, sort_indices_by_key(), and TargetInfo::sql_type.

Referenced by pop_n_rows_from_merged_heaps_gpu().

178  {
179  const auto& oe_type = layout.oe_target_info.sql_type;
180  if (oe_type.is_fp()) {
181  switch (layout.col_bytes) {
182  case 4: {
183  auto d_oe_buffer = get_device_ptr<float>(idx_count, allocator);
184  collect_order_entry_column(d_oe_buffer,
185  d_src_buffer,
186  d_idx_first,
187  idx_count,
188  layout.col_off,
189  layout.row_bytes,
190  allocator);
191  sort_indices_by_key(d_idx_first, idx_count, d_oe_buffer, oe.is_desc, allocator);
192  break;
193  }
194  case 8: {
195  auto d_oe_buffer = get_device_ptr<double>(idx_count, allocator);
196  collect_order_entry_column(d_oe_buffer,
197  d_src_buffer,
198  d_idx_first,
199  idx_count,
200  layout.col_off,
201  layout.row_bytes,
202  allocator);
203  sort_indices_by_key(d_idx_first, idx_count, d_oe_buffer, oe.is_desc, allocator);
204  break;
205  }
206  default:
207  CHECK(false);
208  }
209  return;
210  }
211  CHECK(oe_type.is_number() || oe_type.is_time());
212  switch (layout.col_bytes) {
213  case 4: {
214  auto d_oe_buffer = get_device_ptr<int32_t>(idx_count, allocator);
215  collect_order_entry_column(d_oe_buffer,
216  d_src_buffer,
217  d_idx_first,
218  idx_count,
219  layout.col_off,
220  layout.row_bytes,
221  allocator);
222  sort_indices_by_key(d_idx_first, idx_count, d_oe_buffer, oe.is_desc, allocator);
223  break;
224  }
225  case 8: {
226  auto d_oe_buffer = get_device_ptr<int64_t>(idx_count, allocator);
227  collect_order_entry_column(d_oe_buffer,
228  d_src_buffer,
229  d_idx_first,
230  idx_count,
231  layout.col_off,
232  layout.row_bytes,
233  allocator);
234  sort_indices_by_key(d_idx_first, idx_count, d_oe_buffer, oe.is_desc, allocator);
235  break;
236  }
237  default:
238  CHECK(false);
239  }
240 }
SQLTypeInfo sql_type
Definition: TargetInfo.h:42
CHECK(cgen_state)
bool is_desc
const TargetInfo oe_target_info
void collect_order_entry_column(thrust::device_ptr< K > &d_oe_col_buffer, const int8_t *d_src_buffer, const thrust::device_ptr< I > &d_idx_first, const size_t idx_count, const size_t oe_offset, const size_t oe_stride, ThrustAllocator &allocator)
Definition: TopKSort.cu:138
void sort_indices_by_key(thrust::device_ptr< I > d_idx_first, const size_t idx_count, const thrust::device_ptr< K > &d_key_buffer, const bool desc, ThrustAllocator &allocator)
Definition: TopKSort.cu:155

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename ForwardIterator >
ForwardIterator partition_by_null ( ForwardIterator  first,
ForwardIterator  last,
const int64_t  null_val,
const bool  nulls_first,
const int8_t *  rows_ptr,
const GroupByBufferLayoutInfo layout 
)

Definition at line 72 of file TopKSort.cu.

References GroupByBufferLayoutInfo::col_bytes, GroupByBufferLayoutInfo::col_off, and GroupByBufferLayoutInfo::row_bytes.

Referenced by pop_n_rows_from_merged_heaps_gpu().

77  {
78  if (nulls_first) {
79  return (layout.col_bytes == 4)
80  ? thrust::partition(
81  first,
82  last,
84  rows_ptr + layout.col_off, layout.row_bytes, null_val))
85  : thrust::partition(
86  first,
87  last,
88  is_null_order_entry<int64_t>(
89  rows_ptr + layout.col_off, layout.row_bytes, null_val));
90  } else {
91  return (layout.col_bytes == 4)
92  ? thrust::partition(
93  first,
94  last,
95  thrust::not1(is_null_order_entry<int32_t>(
96  rows_ptr + layout.col_off, layout.row_bytes, null_val)))
97  : thrust::partition(
98  first,
99  last,
100  thrust::not1(is_null_order_entry<int64_t>(
101  rows_ptr + layout.col_off, layout.row_bytes, null_val)));
102  }
103 }
Definition: TopKSort.cu:50

+ Here is the caller graph for this function:

std::vector<int8_t> pop_n_rows_from_merged_heaps_gpu ( Data_Namespace::DataMgr data_mgr,
const int64_t *  dev_heaps,
const size_t  heaps_size,
const size_t  n,
const PodOrderEntry oe,
const GroupByBufferLayoutInfo layout,
const size_t  group_key_bytes,
const size_t  thread_count,
const int  device_id 
)

Definition at line 287 of file TopKSort.cu.

References CHECK_EQ, GroupByBufferLayoutInfo::col_bytes, do_radix_sort(), streaming_top_n::get_heap_size(), streaming_top_n::get_rows_offset_of_heaps(), null_val_bit_pattern(), PodOrderEntry::nulls_first, GroupByBufferLayoutInfo::oe_target_info, partition_by_null(), reset_keys_in_row_buffer(), GroupByBufferLayoutInfo::row_bytes, and TargetInfo::sql_type.

296  {
297  const auto row_size = layout.row_bytes;
298  CHECK_EQ(heaps_size, streaming_top_n::get_heap_size(row_size, n, thread_count));
299  const int8_t* rows_ptr = reinterpret_cast<const int8_t*>(dev_heaps) +
301  const auto total_entry_count = n * thread_count;
302  ThrustAllocator thrust_allocator(data_mgr, device_id);
303  auto d_indices = get_device_ptr<int32_t>(total_entry_count, thrust_allocator);
304  thrust::sequence(
305  thrust::device(thrust_allocator), d_indices, d_indices + total_entry_count);
306  auto separator = (group_key_bytes == 4)
307  ? thrust::partition(thrust::device(thrust_allocator),
308  d_indices,
309  d_indices + total_entry_count,
310  is_taken_entry<int32_t>(rows_ptr, row_size))
311  : thrust::partition(thrust::device(thrust_allocator),
312  d_indices,
313  d_indices + total_entry_count,
314  is_taken_entry<int64_t>(rows_ptr, row_size));
315  const size_t actual_entry_count = separator - d_indices;
316  if (!actual_entry_count) {
317  std::vector<int8_t> top_rows(row_size * n);
319  thrust::host, &top_rows[0], layout.col_bytes, row_size, 0, n);
320  return top_rows;
321  }
322 
323  const auto& oe_type = layout.oe_target_info.sql_type;
324  if (oe_type.get_notnull()) {
325  do_radix_sort(d_indices, actual_entry_count, rows_ptr, oe, layout, thrust_allocator);
326  } else {
327  auto separator = partition_by_null(d_indices,
328  d_indices + actual_entry_count,
329  null_val_bit_pattern(oe_type, false),
330  oe.nulls_first,
331  rows_ptr,
332  layout);
333  if (oe.nulls_first) {
334  const size_t null_count = separator - d_indices;
335  if (null_count < actual_entry_count) {
336  do_radix_sort(separator,
337  actual_entry_count - null_count,
338  rows_ptr,
339  oe,
340  layout,
341  thrust_allocator);
342  }
343  } else {
344  const size_t nonnull_count = separator - d_indices;
345  if (nonnull_count > 0) {
346  do_radix_sort(d_indices, nonnull_count, rows_ptr, oe, layout, thrust_allocator);
347  }
348  }
349  }
350 
351  const auto final_entry_count = std::min(n, actual_entry_count);
352  auto d_top_rows = get_device_ptr<int8_t>(row_size * n, thrust_allocator);
353  thrust::for_each(thrust::device(thrust_allocator),
354  thrust::make_counting_iterator(size_t(0)),
355  thrust::make_counting_iterator(final_entry_count),
356  RowFetcher<int32_t>(thrust::raw_pointer_cast(d_top_rows),
357  rows_ptr,
358  thrust::raw_pointer_cast(d_indices),
359  row_size));
360 
361  if (final_entry_count < n) {
362  reset_keys_in_row_buffer(thrust::device(thrust_allocator),
363  thrust::raw_pointer_cast(d_top_rows),
364  layout.col_bytes,
365  row_size,
366  final_entry_count,
367  n);
368  }
369 
370  std::vector<int8_t> top_rows(row_size * n);
371  thrust::copy(d_top_rows, d_top_rows + row_size * n, top_rows.begin());
372  return top_rows;
373 }
#define CHECK_EQ(x, y)
Definition: Logger.h:205
void reset_keys_in_row_buffer(const thrust::detail::execution_policy_base< DerivedPolicy > &exec, int8_t *row_buffer, const size_t key_width, const size_t row_size, const size_t first, const size_t last)
Definition: TopKSort.cu:260
SQLTypeInfo sql_type
Definition: TargetInfo.h:42
size_t get_rows_offset_of_heaps(const size_t n, const size_t thread_count)
Definition: TopKSort.cu:39
int64_t null_val_bit_pattern(const SQLTypeInfo &ti, const bool float_argument_input)
bool nulls_first
size_t get_heap_size(const size_t row_size, const size_t n, const size_t thread_count)
const TargetInfo oe_target_info
ForwardIterator partition_by_null(ForwardIterator first, ForwardIterator last, const int64_t null_val, const bool nulls_first, const int8_t *rows_ptr, const GroupByBufferLayoutInfo &layout)
Definition: TopKSort.cu:72
void do_radix_sort(thrust::device_ptr< I > d_idx_first, const size_t idx_count, const int8_t *d_src_buffer, const PodOrderEntry &oe, const GroupByBufferLayoutInfo &layout, ThrustAllocator &allocator)
Definition: TopKSort.cu:173

+ Here is the call graph for this function:

template<typename DerivedPolicy >
void reset_keys_in_row_buffer ( const thrust::detail::execution_policy_base< DerivedPolicy > &  exec,
int8_t *  row_buffer,
const size_t  key_width,
const size_t  row_size,
const size_t  first,
const size_t  last 
)

Definition at line 260 of file TopKSort.cu.

References CHECK(), EMPTY_KEY_32, and EMPTY_KEY_64.

Referenced by pop_n_rows_from_merged_heaps_gpu().

266  {
267  switch (key_width) {
268  case 4:
269  thrust::for_each(
270  exec,
271  thrust::make_counting_iterator(first),
272  thrust::make_counting_iterator(last),
273  KeyReseter<int32_t>(row_buffer, row_size, static_cast<int32_t>(EMPTY_KEY_32)));
274  break;
275  case 8:
276  thrust::for_each(
277  exec,
278  thrust::make_counting_iterator(first),
279  thrust::make_counting_iterator(last),
280  KeyReseter<int64_t>(row_buffer, row_size, static_cast<int64_t>(EMPTY_KEY_64)));
281  break;
282  default:
283  CHECK(false);
284  }
285 }
#define EMPTY_KEY_64
CHECK(cgen_state)
#define EMPTY_KEY_32

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<class K , class I >
void sort_indices_by_key ( thrust::device_ptr< I >  d_idx_first,
const size_t  idx_count,
const thrust::device_ptr< K > &  d_key_buffer,
const bool  desc,
ThrustAllocator allocator 
)

Definition at line 155 of file TopKSort.cu.

Referenced by do_radix_sort().

159  {
160  if (desc) {
161  thrust::sort_by_key(thrust::device(allocator),
162  d_key_buffer,
163  d_key_buffer + idx_count,
164  d_idx_first,
165  thrust::greater<K>());
166  } else {
167  thrust::sort_by_key(
168  thrust::device(allocator), d_key_buffer, d_key_buffer + idx_count, d_idx_first);
169  }
170 }

+ Here is the caller graph for this function: