OmniSciDB  04ee39c94c
InPlaceSort.cpp File Reference
#include "InPlaceSort.h"
#include "InPlaceSortImpl.h"
#include <Analyzer/Analyzer.h>
#include "Allocators/ThrustAllocator.h"
#include "Descriptors/QueryMemoryDescriptor.h"
#include "Shared/Logger.h"
#include <cstdint>
+ Include dependency graph for InPlaceSort.cpp:

Go to the source code of this file.

Namespaces

 anonymous_namespace{InPlaceSort.cpp}
 

Functions

void sort_groups_cpu (int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes)
 
void apply_permutation_cpu (int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, int64_t *tmp_buff, const uint32_t chosen_bytes)
 
void anonymous_namespace{InPlaceSort.cpp}::sort_groups_gpu (int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes, ThrustAllocator &alloc)
 
void anonymous_namespace{InPlaceSort.cpp}::apply_permutation_gpu (int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const uint32_t chosen_bytes, ThrustAllocator &alloc)
 
void inplace_sort_gpu (const std::list< Analyzer::OrderEntry > &order_entries, const QueryMemoryDescriptor &query_mem_desc, const GpuGroupByBuffers &group_by_buffers, Data_Namespace::DataMgr *data_mgr, const int device_id)
 

Function Documentation

◆ apply_permutation_cpu()

void apply_permutation_cpu ( int64_t *  val_buff,
int32_t *  idx_buff,
const uint64_t  entry_count,
int64_t *  tmp_buff,
const uint32_t  chosen_bytes 
)

Definition at line 46 of file InPlaceSort.cpp.

References apply_permutation_on_cpu(), and CHECK.

Referenced by ResultSet::radixSortOnCpu().

50  {
51 #ifdef HAVE_CUDA
52  switch (chosen_bytes) {
53  case 1:
54  case 2:
55  case 4:
56  case 8:
57  apply_permutation_on_cpu(val_buff, idx_buff, entry_count, tmp_buff, chosen_bytes);
58  break;
59  default:
60  CHECK(false);
61  }
62 #endif
63 }
void apply_permutation_on_cpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, int64_t *tmp_buff, const uint32_t chosen_bytes)
#define CHECK(condition)
Definition: Logger.h:187
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ inplace_sort_gpu()

void inplace_sort_gpu ( const std::list< Analyzer::OrderEntry > &  order_entries,
const QueryMemoryDescriptor query_mem_desc,
const GpuGroupByBuffers group_by_buffers,
Data_Namespace::DataMgr data_mgr,
const int  device_id 
)

Definition at line 108 of file InPlaceSort.cpp.

References align_to_int64(), anonymous_namespace{InPlaceSort.cpp}::apply_permutation_gpu(), CHECK_EQ, QueryMemoryDescriptor::getColOffInBytes(), QueryMemoryDescriptor::getEntryCount(), QueryMemoryDescriptor::getPaddedSlotWidthBytes(), QueryMemoryDescriptor::getSlotCount(), QueryMemoryDescriptor::hasKeylessHash(), GpuGroupByBuffers::second, and anonymous_namespace{InPlaceSort.cpp}::sort_groups_gpu().

Referenced by QueryExecutionContext::launchGpuCode(), and ResultSet::radixSortOnGpu().

112  {
113  ThrustAllocator alloc(data_mgr, device_id);
114  CHECK_EQ(size_t(1), order_entries.size());
115  const auto idx_buff = group_by_buffers.second -
116  align_to_int64(query_mem_desc.getEntryCount() * sizeof(int32_t));
117  for (const auto& order_entry : order_entries) {
118  const auto target_idx = order_entry.tle_no - 1;
119  const auto val_buff =
120  group_by_buffers.second + query_mem_desc.getColOffInBytes(target_idx);
121  const auto chosen_bytes = query_mem_desc.getPaddedSlotWidthBytes(target_idx);
122  sort_groups_gpu(reinterpret_cast<int64_t*>(val_buff),
123  reinterpret_cast<int32_t*>(idx_buff),
124  query_mem_desc.getEntryCount(),
125  order_entry.is_desc,
126  chosen_bytes,
127  alloc);
128  if (!query_mem_desc.hasKeylessHash()) {
129  apply_permutation_gpu(reinterpret_cast<int64_t*>(group_by_buffers.second),
130  reinterpret_cast<int32_t*>(idx_buff),
131  query_mem_desc.getEntryCount(),
132  sizeof(int64_t),
133  alloc);
134  }
135  for (size_t target_idx = 0; target_idx < query_mem_desc.getSlotCount();
136  ++target_idx) {
137  if (static_cast<int>(target_idx) == order_entry.tle_no - 1) {
138  continue;
139  }
140  const auto chosen_bytes = query_mem_desc.getPaddedSlotWidthBytes(target_idx);
141  const auto val_buff =
142  group_by_buffers.second + query_mem_desc.getColOffInBytes(target_idx);
143  apply_permutation_gpu(reinterpret_cast<int64_t*>(val_buff),
144  reinterpret_cast<int32_t*>(idx_buff),
145  query_mem_desc.getEntryCount(),
146  chosen_bytes,
147  alloc);
148  }
149  }
150 }
#define CHECK_EQ(x, y)
Definition: Logger.h:195
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
void sort_groups_gpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes, ThrustAllocator &alloc)
Definition: InPlaceSort.cpp:67
CUdeviceptr second
Definition: GpuMemUtils.h:61
void apply_permutation_gpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const uint32_t chosen_bytes, ThrustAllocator &alloc)
Definition: InPlaceSort.cpp:87
size_t getColOffInBytes(const size_t col_idx) const
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
+ Here is the call graph for this function:
+ Here is the caller graph for this function:

◆ sort_groups_cpu()

void sort_groups_cpu ( int64_t *  val_buff,
int32_t *  idx_buff,
const uint64_t  entry_count,
const bool  desc,
const uint32_t  chosen_bytes 
)

Definition at line 27 of file InPlaceSort.cpp.

References CHECK, and sort_on_cpu().

Referenced by ResultSet::radixSortOnCpu().

31  {
32 #ifdef HAVE_CUDA
33  switch (chosen_bytes) {
34  case 1:
35  case 2:
36  case 4:
37  case 8:
38  sort_on_cpu(val_buff, idx_buff, entry_count, desc, chosen_bytes);
39  break;
40  default:
41  CHECK(false);
42  }
43 #endif
44 }
void sort_on_cpu(int64_t *val_buff, int32_t *key_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes)
#define CHECK(condition)
Definition: Logger.h:187
+ Here is the call graph for this function:
+ Here is the caller graph for this function: