OmniSciDB  d2f719934e
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ResultSetSort.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2017 MapD Technologies, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
25 #ifdef HAVE_CUDA
26 #include "Execute.h"
27 #include "ResultSet.h"
28 #include "ResultSetSortImpl.h"
29 
30 #include "../Shared/thread_count.h"
31 
32 #include <future>
33 
34 std::unique_ptr<CudaMgr_Namespace::CudaMgr> g_cuda_mgr; // for unit tests only
35 
36 namespace {
37 
38 void set_cuda_context(Data_Namespace::DataMgr* data_mgr, const int device_id) {
39  if (data_mgr) {
40  data_mgr->getCudaMgr()->setContext(device_id);
41  return;
42  }
43  // for unit tests only
44  CHECK(g_cuda_mgr);
45  g_cuda_mgr->setContext(device_id);
46 }
47 
48 } // namespace
49 
50 void ResultSet::doBaselineSort(const ExecutorDeviceType device_type,
51  const std::list<Analyzer::OrderEntry>& order_entries,
52  const size_t top_n,
53  const Executor* executor) {
54  CHECK_EQ(size_t(1), order_entries.size());
56  const auto& oe = order_entries.front();
57  CHECK_GT(oe.tle_no, 0);
58  CHECK_LE(static_cast<size_t>(oe.tle_no), targets_.size());
59  size_t logical_slot_idx = 0;
60  size_t physical_slot_off = 0;
61  for (size_t i = 0; i < static_cast<size_t>(oe.tle_no - 1); ++i) {
62  physical_slot_off += query_mem_desc_.getPaddedSlotWidthBytes(logical_slot_idx);
63  logical_slot_idx =
65  }
66  const auto col_off =
67  get_slot_off_quad(query_mem_desc_) * sizeof(int64_t) + physical_slot_off;
68  const size_t col_bytes = query_mem_desc_.getPaddedSlotWidthBytes(logical_slot_idx);
69  const auto row_bytes = get_row_bytes(query_mem_desc_);
70  const auto target_groupby_indices_sz = query_mem_desc_.targetGroupbyIndicesSize();
71  CHECK(target_groupby_indices_sz == 0 ||
72  static_cast<size_t>(oe.tle_no) <= target_groupby_indices_sz);
73  const int64_t target_groupby_index{
74  target_groupby_indices_sz == 0
75  ? -1
76  : query_mem_desc_.getTargetGroupbyIndex(oe.tle_no - 1)};
78  col_off,
79  col_bytes,
80  row_bytes,
81  targets_[oe.tle_no - 1],
82  target_groupby_index};
83  PodOrderEntry pod_oe{oe.tle_no, oe.is_desc, oe.nulls_first};
84  auto groupby_buffer = storage_->getUnderlyingBuffer();
85  auto data_mgr = getDataManager();
86  const auto step = static_cast<size_t>(
87  device_type == ExecutorDeviceType::GPU ? getGpuCount() : cpu_threads());
88  CHECK_GE(step, size_t(1));
89  const auto key_bytewidth = query_mem_desc_.getEffectiveKeyWidth();
90  if (step > 1) {
91  std::vector<std::future<void>> top_futures;
92  std::vector<Permutation> strided_permutations(step);
93  for (size_t start = 0; start < step; ++start) {
94  top_futures.emplace_back(std::async(
96  [&strided_permutations,
97  data_mgr,
98  device_type,
99  groupby_buffer,
100  pod_oe,
101  key_bytewidth,
102  layout,
103  top_n,
104  start,
105  step] {
106  if (device_type == ExecutorDeviceType::GPU) {
107  set_cuda_context(data_mgr, start);
108  }
109  strided_permutations[start] = (key_bytewidth == 4)
110  ? baseline_sort<int32_t>(device_type,
111  start,
112  data_mgr,
113  groupby_buffer,
114  pod_oe,
115  layout,
116  top_n,
117  start,
118  step)
119  : baseline_sort<int64_t>(device_type,
120  start,
121  data_mgr,
122  groupby_buffer,
123  pod_oe,
124  layout,
125  top_n,
126  start,
127  step);
128  }));
129  }
130  for (auto& top_future : top_futures) {
131  top_future.wait();
132  }
133  for (auto& top_future : top_futures) {
134  top_future.get();
135  }
136  permutation_.reserve(strided_permutations.size() * top_n);
137  for (const auto& strided_permutation : strided_permutations) {
138  permutation_.insert(
139  permutation_.end(), strided_permutation.begin(), strided_permutation.end());
140  }
141  auto pv = PermutationView(permutation_.data(), permutation_.size());
142  topPermutation(pv, top_n, createComparator(order_entries, pv, executor, false));
143  if (top_n < permutation_.size()) {
144  permutation_.resize(top_n);
145  permutation_.shrink_to_fit();
146  }
147  return;
148  } else {
149  permutation_ =
150  (key_bytewidth == 4)
152  device_type, 0, data_mgr, groupby_buffer, pod_oe, layout, top_n, 0, 1)
154  device_type, 0, data_mgr, groupby_buffer, pod_oe, layout, top_n, 0, 1);
155  }
156 }
157 
159  const std::list<Analyzer::OrderEntry>& order_entries,
160  const size_t top_n) {
161  if (order_entries.size() != 1 || query_mem_desc_.hasKeylessHash() ||
163  return false;
164  }
165  const auto& order_entry = order_entries.front();
166  CHECK_GE(order_entry.tle_no, 1);
167  CHECK_LE(static_cast<size_t>(order_entry.tle_no), targets_.size());
168  const auto& target_info = targets_[order_entry.tle_no - 1];
169  if (!target_info.sql_type.is_number() || is_distinct_target(target_info)) {
170  return false;
171  }
175  top_n;
176 }
177 
179  if (catalog_) {
180  return &catalog_->getDataMgr();
181  }
182  return nullptr;
183 }
184 
185 int ResultSet::getGpuCount() const {
186  const auto data_mgr = getDataManager();
187  if (!data_mgr) {
188  return g_cuda_mgr ? g_cuda_mgr->getDeviceCount() : 0;
189  }
190  return data_mgr->gpusPresent() ? data_mgr->getCudaMgr()->getDeviceCount() : 0;
191 }
192 #endif // HAVE_CUDA
CudaMgr_Namespace::CudaMgr * getCudaMgr() const
Definition: DataMgr.h:222
#define CHECK_EQ(x, y)
Definition: Logger.h:219
VectorView< PermutationIdx > PermutationView
Definition: ResultSet.h:155
Permutation permutation_
Definition: ResultSet.h:763
int64_t getTargetGroupbyIndex(const size_t target_idx) const
ExecutorDeviceType
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:226
const Catalog_Namespace::Catalog * catalog_
Definition: ResultSet.h:765
void setContext(const int device_num) const
Definition: CudaMgr.cpp:362
QueryMemoryDescriptor query_mem_desc_
Definition: ResultSet.h:755
#define CHECK_GE(x, y)
Definition: Logger.h:224
std::unique_ptr< ResultSetStorage > storage_
Definition: ResultSet.h:756
size_t get_slot_off_quad(const QueryMemoryDescriptor &query_mem_desc)
size_t getEffectiveKeyWidth() const
#define CHECK_GT(x, y)
Definition: Logger.h:223
const std::vector< TargetInfo > targets_
Definition: ResultSet.h:752
future< Result > async(Fn &&fn, Args &&...args)
size_t advance_slot(const size_t j, const TargetInfo &target_info, const bool separate_varlen_storage)
int getDeviceCount() const
Definition: CudaMgr.h:86
size_t targetGroupbyIndicesSize() const
bool canUseFastBaselineSort(const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n)
bool is_distinct_target(const TargetInfo &target_info)
Definition: TargetInfo.h:153
Comparator createComparator(const std::list< Analyzer::OrderEntry > &order_entries, const PermutationView permutation, const Executor *executor, const bool single_threaded)
Definition: ResultSet.h:688
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
int tle_no
QueryDescriptionType getQueryDescriptionType() const
bool isSingleColumnGroupByWithPerfectHash() const
#define CHECK_LE(x, y)
Definition: Logger.h:222
size_t get_row_bytes(const QueryMemoryDescriptor &query_mem_desc)
static PermutationView topPermutation(PermutationView, const size_t n, const Comparator &)
Definition: ResultSet.cpp:1045
#define CHECK(condition)
Definition: Logger.h:211
int getGpuCount() const
bool gpusPresent() const
Definition: DataMgr.h:216
Basic constructors and methods of the row set interface.
void doBaselineSort(const ExecutorDeviceType device_type, const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n, const Executor *executor)
bool separate_varlen_storage_valid_
Definition: ResultSet.h:789
Data_Namespace::DataMgr * getDataManager() const
int cpu_threads()
Definition: thread_count.h:24
template std::vector< uint32_t > baseline_sort< int32_t >(const ExecutorDeviceType device_type, const int device_id, Data_Namespace::DataMgr *data_mgr, const int8_t *groupby_buffer, const PodOrderEntry &oe, const GroupByBufferLayoutInfo &layout, const size_t top_n, const size_t start, const size_t step)
template std::vector< uint32_t > baseline_sort< int64_t >(const ExecutorDeviceType device_type, const int device_id, Data_Namespace::DataMgr *data_mgr, const int8_t *groupby_buffer, const PodOrderEntry &oe, const GroupByBufferLayoutInfo &layout, const size_t top_n, const size_t start, const size_t step)