OmniSciDB  a47db9e897
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
ResultSetSortImpl.cu File Reference
#include "BufferCompaction.h"
#include "GpuMemUtils.h"
#include "GpuRtConstants.h"
#include "ResultSetBufferAccessors.h"
#include "ResultSetSortImpl.h"
#include "SortUtils.cuh"
#include <thrust/copy.h>
#include <thrust/execution_policy.h>
#include <thrust/host_vector.h>
#include <thrust/sort.h>
#include "BufferEntryUtils.h"
+ Include dependency graph for ResultSetSortImpl.cu:

Go to the source code of this file.

Namespaces

 anonymous_namespace{ResultSetSortImpl.cu}
 

Macros

#define FORCE_CPU_VERSION
 

Functions

template<class K , class V , class I >
std::vector< uint32_t > anonymous_namespace{ResultSetSortImpl.cu}::do_radix_sort (const ExecutorDeviceType device_type, ThrustAllocator &thrust_allocator, const int8_t *groupby_buffer, V dev_oe_col_buffer_begin, V dev_oe_col_buffer_end, I dev_idx_buff_begin, const size_t dev_idx_buff_size, const PodOrderEntry &oe, const GroupByBufferLayoutInfo &layout, const size_t top_n)
 
void anonymous_namespace{ResultSetSortImpl.cu}::add_nulls (std::vector< uint32_t > &idx_buff, const std::vector< uint32_t > &null_idx_buff, const PodOrderEntry &oe)
 
template<typename T >
thrust::device_ptr< T > anonymous_namespace{ResultSetSortImpl.cu}::get_device_copy_ptr (const thrust::host_vector< T > &host_vec, ThrustAllocator &thrust_allocator)
 
template<class K >
std::vector< uint32_t > anonymous_namespace{ResultSetSortImpl.cu}::baseline_sort_fp (const ExecutorDeviceType device_type, const int device_id, Data_Namespace::DataMgr *data_mgr, const int8_t *groupby_buffer, const thrust::host_vector< int64_t > &oe_col_buffer, const PodOrderEntry &oe, const GroupByBufferLayoutInfo &layout, const size_t top_n, const size_t start, const size_t step)
 
template<class K >
std::vector< uint32_t > anonymous_namespace{ResultSetSortImpl.cu}::baseline_sort_int (const ExecutorDeviceType device_type, const int device_id, Data_Namespace::DataMgr *data_mgr, const int8_t *groupby_buffer, const thrust::host_vector< int64_t > &oe_col_buffer, const PodOrderEntry &oe, const GroupByBufferLayoutInfo &layout, const size_t top_n, const size_t start, const size_t step)
 
template<class K >
thrust::host_vector< int64_t > anonymous_namespace{ResultSetSortImpl.cu}::collect_order_entry_column (const int8_t *groupby_buffer, const GroupByBufferLayoutInfo &layout, const size_t start, const size_t step)
 
template<class K >
std::vector< uint32_t > baseline_sort (const ExecutorDeviceType device_type, const int device_id, Data_Namespace::DataMgr *data_mgr, const int8_t *groupby_buffer, const PodOrderEntry &oe, const GroupByBufferLayoutInfo &layout, const size_t top_n, const size_t start, const size_t step)
 
template std::vector< uint32_t > baseline_sort< int32_t > (const ExecutorDeviceType device_type, const int device_id, Data_Namespace::DataMgr *data_mgr, const int8_t *groupby_buffer, const PodOrderEntry &oe, const GroupByBufferLayoutInfo &layout, const size_t top_n, const size_t start, const size_t step)
 
template std::vector< uint32_t > baseline_sort< int64_t > (const ExecutorDeviceType device_type, const int device_id, Data_Namespace::DataMgr *data_mgr, const int8_t *groupby_buffer, const PodOrderEntry &oe, const GroupByBufferLayoutInfo &layout, const size_t top_n, const size_t start, const size_t step)
 

Macro Definition Documentation

#define FORCE_CPU_VERSION

Definition at line 13 of file ResultSetSortImpl.cu.

Function Documentation

template<class K >
std::vector<uint32_t> baseline_sort ( const ExecutorDeviceType  device_type,
const int  device_id,
Data_Namespace::DataMgr data_mgr,
const int8_t *  groupby_buffer,
const PodOrderEntry oe,
const GroupByBufferLayoutInfo layout,
const size_t  top_n,
const size_t  start,
const size_t  step 
)

Definition at line 337 of file ResultSetSortImpl.cu.

References CHECK(), CPU, get_compact_type(), anonymous_namespace{ResultSetSortImpl.cu}::get_device_copy_ptr(), GPU, PodOrderEntry::is_desc, kAVG, and PodOrderEntry::nulls_first.

345  {
346  auto oe_col_buffer = collect_order_entry_column<K>(groupby_buffer, layout, start, step);
347  const auto& entry_ti = get_compact_type(layout.oe_target_info);
348  CHECK(entry_ti.is_number());
349  if (entry_ti.is_fp() || layout.oe_target_info.agg_kind == kAVG) {
350  return baseline_sort_fp<K>(device_type,
351  device_id,
352  data_mgr,
353  groupby_buffer,
354  oe_col_buffer,
355  oe,
356  layout,
357  top_n,
358  start,
359  step);
360  }
361  // Because of how we represent nulls for integral types, they'd be at the
362  // wrong position in these two cases. Separate them into a different buffer.
363  if ((oe.is_desc && oe.nulls_first) || (!oe.is_desc && !oe.nulls_first)) {
364  return baseline_sort_int<K>(device_type,
365  device_id,
366  data_mgr,
367  groupby_buffer,
368  oe_col_buffer,
369  oe,
370  layout,
371  top_n,
372  start,
373  step);
374  }
375  ThrustAllocator thrust_allocator(data_mgr, device_id);
376  // Fastest path, no need to separate nulls away since they'll end up at the
377  // right place as a side effect of how we're representing nulls.
378  if (device_type == ExecutorDeviceType::GPU) {
379  if (oe_col_buffer.empty()) {
380  return {};
381  }
382  const auto dev_idx_buff =
383  get_device_ptr<uint32_t>(oe_col_buffer.size(), thrust_allocator);
384  thrust::sequence(dev_idx_buff, dev_idx_buff + oe_col_buffer.size(), start, step);
385  const auto dev_oe_col_buffer = get_device_copy_ptr(oe_col_buffer, thrust_allocator);
386  return do_radix_sort<K>(device_type,
387  thrust_allocator,
388  groupby_buffer,
389  dev_oe_col_buffer,
390  dev_oe_col_buffer + oe_col_buffer.size(),
391  dev_idx_buff,
392  oe_col_buffer.size(),
393  oe,
394  layout,
395  top_n);
396  }
397  CHECK(device_type == ExecutorDeviceType::CPU);
398  thrust::host_vector<uint32_t> host_idx_buff(oe_col_buffer.size());
399  thrust::sequence(host_idx_buff.begin(), host_idx_buff.end(), start, step);
400  return do_radix_sort<K>(device_type,
401  thrust_allocator,
402  groupby_buffer,
403  oe_col_buffer.begin(),
404  oe_col_buffer.end(),
405  host_idx_buff.begin(),
406  host_idx_buff.size(),
407  oe,
408  layout,
409  top_n);
410 }
thrust::device_ptr< T > get_device_copy_ptr(const thrust::host_vector< T > &host_vec, ThrustAllocator &thrust_allocator)
bool nulls_first
const SQLTypeInfo get_compact_type(const TargetInfo &target)
CHECK(cgen_state)
bool is_desc
Definition: sqldefs.h:71

+ Here is the call graph for this function:

template std::vector<uint32_t> baseline_sort< int32_t > ( const ExecutorDeviceType  device_type,
const int  device_id,
Data_Namespace::DataMgr data_mgr,
const int8_t *  groupby_buffer,
const PodOrderEntry oe,
const GroupByBufferLayoutInfo layout,
const size_t  top_n,
const size_t  start,
const size_t  step 
)
template std::vector<uint32_t> baseline_sort< int64_t > ( const ExecutorDeviceType  device_type,
const int  device_id,
Data_Namespace::DataMgr data_mgr,
const int8_t *  groupby_buffer,
const PodOrderEntry oe,
const GroupByBufferLayoutInfo layout,
const size_t  top_n,
const size_t  start,
const size_t  step 
)