OmniSciDB  04ee39c94c
InPlaceSort.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "InPlaceSort.h"
18 #include "InPlaceSortImpl.h"
19 
20 #include <Analyzer/Analyzer.h>
23 #include "Shared/Logger.h"
24 
25 #include <cstdint>
26 
27 void sort_groups_cpu(int64_t* val_buff,
28  int32_t* idx_buff,
29  const uint64_t entry_count,
30  const bool desc,
31  const uint32_t chosen_bytes) {
32 #ifdef HAVE_CUDA
33  switch (chosen_bytes) {
34  case 1:
35  case 2:
36  case 4:
37  case 8:
38  sort_on_cpu(val_buff, idx_buff, entry_count, desc, chosen_bytes);
39  break;
40  default:
41  CHECK(false);
42  }
43 #endif
44 }
45 
46 void apply_permutation_cpu(int64_t* val_buff,
47  int32_t* idx_buff,
48  const uint64_t entry_count,
49  int64_t* tmp_buff,
50  const uint32_t chosen_bytes) {
51 #ifdef HAVE_CUDA
52  switch (chosen_bytes) {
53  case 1:
54  case 2:
55  case 4:
56  case 8:
57  apply_permutation_on_cpu(val_buff, idx_buff, entry_count, tmp_buff, chosen_bytes);
58  break;
59  default:
60  CHECK(false);
61  }
62 #endif
63 }
64 
65 namespace {
66 
67 void sort_groups_gpu(int64_t* val_buff,
68  int32_t* idx_buff,
69  const uint64_t entry_count,
70  const bool desc,
71  const uint32_t chosen_bytes,
72  ThrustAllocator& alloc) {
73 #ifdef HAVE_CUDA
74  switch (chosen_bytes) {
75  case 1:
76  case 2:
77  case 4:
78  case 8:
79  sort_on_gpu(val_buff, idx_buff, entry_count, desc, chosen_bytes, alloc);
80  break;
81  default:
82  CHECK(false);
83  }
84 #endif
85 }
86 
87 void apply_permutation_gpu(int64_t* val_buff,
88  int32_t* idx_buff,
89  const uint64_t entry_count,
90  const uint32_t chosen_bytes,
91  ThrustAllocator& alloc) {
92 #ifdef HAVE_CUDA
93  switch (chosen_bytes) {
94  case 1:
95  case 2:
96  case 4:
97  case 8:
98  apply_permutation_on_gpu(val_buff, idx_buff, entry_count, chosen_bytes, alloc);
99  break;
100  default:
101  CHECK(false);
102  }
103 #endif
104 }
105 
106 } // namespace
107 
108 void inplace_sort_gpu(const std::list<Analyzer::OrderEntry>& order_entries,
109  const QueryMemoryDescriptor& query_mem_desc,
110  const GpuGroupByBuffers& group_by_buffers,
111  Data_Namespace::DataMgr* data_mgr,
112  const int device_id) {
113  ThrustAllocator alloc(data_mgr, device_id);
114  CHECK_EQ(size_t(1), order_entries.size());
115  const auto idx_buff = group_by_buffers.second -
116  align_to_int64(query_mem_desc.getEntryCount() * sizeof(int32_t));
117  for (const auto& order_entry : order_entries) {
118  const auto target_idx = order_entry.tle_no - 1;
119  const auto val_buff =
120  group_by_buffers.second + query_mem_desc.getColOffInBytes(target_idx);
121  const auto chosen_bytes = query_mem_desc.getPaddedSlotWidthBytes(target_idx);
122  sort_groups_gpu(reinterpret_cast<int64_t*>(val_buff),
123  reinterpret_cast<int32_t*>(idx_buff),
124  query_mem_desc.getEntryCount(),
125  order_entry.is_desc,
126  chosen_bytes,
127  alloc);
128  if (!query_mem_desc.hasKeylessHash()) {
129  apply_permutation_gpu(reinterpret_cast<int64_t*>(group_by_buffers.second),
130  reinterpret_cast<int32_t*>(idx_buff),
131  query_mem_desc.getEntryCount(),
132  sizeof(int64_t),
133  alloc);
134  }
135  for (size_t target_idx = 0; target_idx < query_mem_desc.getSlotCount();
136  ++target_idx) {
137  if (static_cast<int>(target_idx) == order_entry.tle_no - 1) {
138  continue;
139  }
140  const auto chosen_bytes = query_mem_desc.getPaddedSlotWidthBytes(target_idx);
141  const auto val_buff =
142  group_by_buffers.second + query_mem_desc.getColOffInBytes(target_idx);
143  apply_permutation_gpu(reinterpret_cast<int64_t*>(val_buff),
144  reinterpret_cast<int32_t*>(idx_buff),
145  query_mem_desc.getEntryCount(),
146  chosen_bytes,
147  alloc);
148  }
149  }
150 }
Defines data structures for the semantic analysis phase of query processing.
#define CHECK_EQ(x, y)
Definition: Logger.h:195
void sort_groups_cpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes)
Definition: InPlaceSort.cpp:27
void apply_permutation_on_cpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, int64_t *tmp_buff, const uint32_t chosen_bytes)
void inplace_sort_gpu(const std::list< Analyzer::OrderEntry > &order_entries, const QueryMemoryDescriptor &query_mem_desc, const GpuGroupByBuffers &group_by_buffers, Data_Namespace::DataMgr *data_mgr, const int device_id)
void apply_permutation_cpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, int64_t *tmp_buff, const uint32_t chosen_bytes)
Definition: InPlaceSort.cpp:46
void apply_permutation_on_gpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const uint32_t chosen_bytes, ThrustAllocator &alloc)
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
void sort_groups_gpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes, ThrustAllocator &alloc)
Definition: InPlaceSort.cpp:67
CUdeviceptr second
Definition: GpuMemUtils.h:61
void apply_permutation_gpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const uint32_t chosen_bytes, ThrustAllocator &alloc)
Definition: InPlaceSort.cpp:87
void sort_on_cpu(int64_t *val_buff, int32_t *key_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes)
Descriptor for the result set buffer layout.
#define CHECK(condition)
Definition: Logger.h:187
size_t getColOffInBytes(const size_t col_idx) const
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
void sort_on_gpu(int64_t *val_buff, int32_t *key_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes, ThrustAllocator &alloc)