OmniSciDB  0264ff685a
quantile.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /*
18  * @file quantile.h
19  * @author Matt Pulver <matt.pulver@omnisci.com>
20  * @description Calculate approximate median and general quantiles, based on
21  * "Computing Extremely Accurate Quantiles Using t-Digests" by T. Dunning et al.
22  * https://arxiv.org/abs/1902.04023
23  *
24  */
25 
26 #pragma once
27 
28 #include "DoubleSort.h"
29 #include "VectorView.h"
30 #include "gpu_enabled.h"
31 
32 #ifndef __CUDACC__
33 #include <iomanip>
34 #include <ostream>
35 #endif
36 
37 #include <limits>
38 #include <memory>
39 #include <numeric>
40 #include <type_traits>
41 
42 namespace quantile {
43 
44 namespace detail {
45 
46 template <typename RealType, typename IndexType>
47 struct Centroid {
48  RealType sum_;
49  IndexType count_;
50 };
51 
52 template <typename RealType, typename IndexType>
53 struct Centroids {
54  IndexType curr_idx_; // used during mergeCentroids algorithm
55  IndexType next_idx_; // used during mergeCentroids algorithm
56  int inc_; // 1 or -1 : forward or reverse iteration
59  static constexpr RealType infinity = std::numeric_limits<RealType>::infinity();
60  static constexpr RealType nan = std::numeric_limits<RealType>::quiet_NaN();
61  RealType max_{-infinity};
62  RealType min_{infinity};
63 
64  Centroids() = default;
65 
66  DEVICE Centroids(RealType* sums, IndexType* counts, IndexType const size)
67  : sums_(sums, size, size), counts_(counts, size, size) {}
68 
70  : sums_(sums), counts_(counts) {}
71 
72  DEVICE void appendAndSortCurrent(Centroids& buff);
73 
74  DEVICE IndexType capacity() const { return sums_.capacity(); }
75 
76  DEVICE void clear() {
77  sums_.clear();
78  counts_.clear();
79  max_ = -infinity;
80  min_ = infinity;
81  }
82 
83  DEVICE IndexType currCount() const { return counts_[curr_idx_]; }
84 
85  DEVICE RealType currMean() const { return mean(curr_idx_); }
86 
87  DEVICE bool hasCurr() const { return curr_idx_ < size(); }
88 
89  DEVICE bool hasNext() const { return next_idx_ < size(); }
90 
91  DEVICE RealType mean(IndexType const i) const { return sums_[i] / counts_[i]; }
92 
93  // Return true if centroid was merged, false if not.
94  DEVICE bool mergeIfFits(Centroids& centroid, IndexType const max_count);
95 
96  DEVICE void moveNextToCurrent();
97 
98  DEVICE IndexType nextCount() const { return counts_[next_idx_]; }
99 
100  DEVICE RealType nextSum() const { return sums_[next_idx_]; }
101 
102  // Assumes this->hasNext() and b.hasNext().
103  // Order by (nextMean(), -nextCount()).
104  DEVICE bool operator<(Centroids const& b) const {
105  // lhs < rhs is same as nextMean() < b.nextMean() without division
106  RealType const lhs = nextSum() * b.nextCount();
107  RealType const rhs = b.nextSum() * nextCount();
108  return lhs < rhs || (lhs == rhs && b.nextCount() < nextCount());
109  }
110 
111  DEVICE void push_back(RealType const value, RealType const count) {
112  sums_.push_back(value);
113  counts_.push_back(count);
114  }
115 
116  DEVICE void resetIndices(bool const forward);
117 
118  DEVICE size_t size() const { return sums_.size(); }
119 
120  DEVICE IndexType totalWeight() const {
121  return gpu_enabled::accumulate(counts_.begin(), counts_.end(), IndexType(0));
122  }
123 
124 #ifndef __CUDACC__
125  template <typename RealType2, typename IndexType2>
126  friend std::ostream& operator<<(std::ostream&, Centroids<RealType2, IndexType2> const&);
127 #endif
128 };
129 
130 template <typename RealType, typename IndexType>
135  IndexType prefix_sum_{0}; // Prefix-sum of centroid counts.
136  IndexType const total_weight_;
137  bool const forward_;
138 
139  DEVICE void mergeMinMax();
140  DEVICE void setCurrCentroid();
141 
142  public:
145  bool const forward);
146 
147  // nullptr if !hasNext()
148  DEVICE Centroids<RealType, IndexType>* getNextCentroid() const;
149 
150  DEVICE bool hasNext() const { return buf_->hasNext() || centroids_->hasNext(); }
151 
152  // Merge as many centroids as possible for count <= max_count.
153  DEVICE void merge(IndexType const max_count);
154 
155  // Assume curr_centroid_ is fully merged.
156  DEVICE void next();
157 
158  DEVICE IndexType prefixSum() const { return prefix_sum_; }
159 
160  DEVICE IndexType totalWeight() const { return total_weight_; }
161 };
162 
163 // Instantiated on host for reserving memory for Centroids.
164 template <typename RealType, typename IndexType>
166  // TODO CUDA memory containers
167  std::vector<RealType> sums_;
168  std::vector<IndexType> counts_;
169 
170  public:
171  explicit CentroidsMemory(size_t const size) : sums_(size), counts_(size) {}
172  size_t nbytes() const { return sums_.size() * (sizeof(RealType) + sizeof(IndexType)); }
173  size_t size() const { return sums_.size(); }
174  VectorView<RealType> sums() { return {sums_.data(), sums_.size(), sums_.size()}; }
176  return {counts_.data(), counts_.size(), counts_.size()};
177  }
178 };
179 
180 template <typename RealType, typename IndexType = size_t>
181 class TDigest {
184  bool forward_{true}; // alternate direction on each call to mergeCentroids().
185 
186  DEVICE RealType max() const { return centroids_.max_; }
187  DEVICE RealType min() const { return centroids_.min_; }
188 
189  DEVICE IndexType maxCardinality(IndexType const sum, IndexType const total_weight);
190 
191  // Require: centroids are sorted by (mean, -count)
192  DEVICE void mergeCentroids(Centroids<RealType, IndexType>&);
193 
194  DEVICE RealType firstCentroid(RealType const x);
195  DEVICE RealType interiorCentroid(RealType const x,
196  IndexType const idx1,
197  IndexType const prefix_sum);
198  DEVICE RealType lastCentroid(RealType const x, IndexType const N);
199  DEVICE RealType oneCentroid(RealType const x);
200 
201  DEVICE RealType slope(IndexType const idx1, IndexType const idx2);
202 
203  public:
205 
206  TDigest() = default;
207 
209  : centroids_(mem.sums().data(), mem.counts().data(), mem.size()) {
210  centroids_.clear();
211  }
212 
214 
215  // Store value to buf_, and merge when full.
216  DEVICE void add(RealType value);
217 
218  DEVICE void mergeBuffer();
219 
220  // Import from sorted memory range. Ok to change values during merge.
221  DEVICE void mergeSorted(RealType* sums, IndexType* counts, IndexType size);
222 
223  DEVICE void mergeTDigest(TDigest& t_digest) {
224  mergeBuffer();
225  t_digest.mergeBuffer();
226  mergeCentroids(t_digest.centroids_);
227  }
228 
229  // Uses buf_ as scratch space.
230  DEVICE RealType quantile(RealType const q) {
231  assert(centroids_.size() <= buf_.capacity());
232  return quantile(buf_.counts_.data(), q);
233  }
234 
235  // Uses buf as scratch space.
236  DEVICE RealType quantile(IndexType* buf, RealType const q);
237 
238  // Assumes mem is externally managed.
239  DEVICE void setBuffer(Memory& mem) {
241  mem.sums().data(), mem.counts().data(), mem.size());
242  buf_.clear();
243  }
244 
245  // Assumes mem is externally managed.
247  centroids_ = Centroids<RealType, IndexType>(
248  mem.sums().data(), mem.counts().data(), mem.size());
249  centroids_.clear();
250  }
251 
253  VectorView<IndexType> const counts) {
254  centroids_ = Centroids<RealType, IndexType>(sums, counts);
255  centroids_.clear();
256  }
257 
258  // Total number of data points in centroids_.
259  DEVICE IndexType totalWeight() const { return centroids_.totalWeight(); }
260 };
261 
262 // Class template member definitions
263 
264 // Class Centroids<>
265 
266 namespace {
267 
268 // Order by (mean, -count)
269 template <typename RealType, typename IndexType>
272  // Assume value1() is positive and use multiplication instead of division.
273  DEVICE bool operator()(Value const& a, Value const& b) const {
274  auto const lhs = a.value0() * b.value1();
275  auto const rhs = b.value0() * a.value1();
276  return lhs < rhs || (lhs == rhs && b.value1() < a.value1());
277  }
278 };
279 
280 } // namespace
281 
282 template <typename RealType, typename IndexType>
284  if (inc_ == -1 && curr_idx_ != 0) {
285  // Shift data to the left left by curr_idx_.
286  // Prefer to copy, but thrust::copy doesn't support overlapping ranges.
287  // Reverse instead, which gets sorted below.
288  // gpu_enabled::copy(sums_.begin() + curr_idx_, sums_.end(), sums_.begin());
289  // gpu_enabled::copy(counts_.begin() + curr_idx_, counts_.end(), counts_.begin());
290  gpu_enabled::reverse(sums_.begin(), sums_.end());
291  gpu_enabled::reverse(counts_.begin(), counts_.end());
292  }
293  // Shift VectorViews to the right by buff.curr_idx_.
294  IndexType const offset = inc_ == 1 ? 0 : buff.curr_idx_;
295  IndexType const buff_size =
296  inc_ == 1 ? buff.curr_idx_ + 1 : buff.size() - buff.curr_idx_;
297  VectorView<RealType> buff_sums(buff.sums_.begin() + offset, buff_size, buff_size);
298  VectorView<IndexType> buff_counts(buff.counts_.begin() + offset, buff_size, buff_size);
299  // Copy buff into sums_ and counts_.
300  IndexType const curr_size = inc_ == 1 ? curr_idx_ + 1 : size() - curr_idx_;
301  IndexType const total_size = curr_size + buff_sums.size();
302  assert(total_size <= sums_.capacity()); // TODO proof of inequality
303  sums_.resize(total_size);
304  gpu_enabled::copy(buff_sums.begin(), buff_sums.end(), sums_.begin() + curr_size);
305  assert(total_size <= counts_.capacity());
306  counts_.resize(total_size);
307  gpu_enabled::copy(buff_counts.begin(), buff_counts.end(), counts_.begin() + curr_size);
308 
309  // Sort sums_ and counts_ by (mean, -count).
310  double_sort::Iterator<RealType, IndexType> const begin(sums_.begin(), counts_.begin());
311  double_sort::Iterator<RealType, IndexType> const end(sums_.end(), counts_.end());
312  gpu_enabled::sort(begin, end, OrderByMeanAscCountDesc<RealType, IndexType>());
313 }
314 
315 // Return true if centroid was merged, false if not.
316 template <typename RealType, typename IndexType>
318  IndexType const max_count) {
319  if (counts_[curr_idx_] + centroid.nextCount() <= max_count) {
320  sums_[curr_idx_] += centroid.nextSum();
321  counts_[curr_idx_] += centroid.nextCount();
322  centroid.next_idx_ += centroid.inc_;
323  return true;
324  }
325  return false;
326 }
327 
328 template <typename RealType, typename IndexType>
330  curr_idx_ += inc_;
331  if (curr_idx_ != next_idx_) {
332  sums_[curr_idx_] = sums_[next_idx_];
333  counts_[curr_idx_] = counts_[next_idx_];
334  }
335  next_idx_ += inc_;
336 }
337 
338 template <typename RealType, typename IndexType>
340  if (forward) {
341  inc_ = 1;
342  curr_idx_ = ~IndexType(0);
343  } else {
344  inc_ = -1;
345  curr_idx_ = size();
346  }
347  static_assert(std::is_unsigned<IndexType>::value,
348  "IndexType must be an unsigned type.");
349  next_idx_ = curr_idx_ + inc_;
350 }
351 
352 #ifndef __CUDACC__
353 template <typename RealType, typename IndexType>
354 std::ostream& operator<<(std::ostream& out,
355  Centroids<RealType, IndexType> const& centroids) {
356  out << "Centroids<" << typeid(RealType).name() << ',' << typeid(IndexType).name()
357  << ">(size(" << centroids.size() << ") curr_idx_(" << centroids.curr_idx_
358  << ") next_idx_(" << centroids.next_idx_ << ") sums_(";
359  for (IndexType i = 0; i < centroids.sums_.size(); ++i) {
360  out << (i ? " " : "") << std::setprecision(20) << centroids.sums_[i];
361  }
362  out << ") counts_(";
363  for (IndexType i = 0; i < centroids.counts_.size(); ++i) {
364  out << (i ? " " : "") << centroids.counts_[i];
365  }
366  return out << "))";
367 }
368 #endif
369 
370 // Class CentroidsMerger<>
371 
372 template <typename RealType, typename IndexType>
376  bool const forward)
377  : buf_(buf)
378  , centroids_(centroids)
379  , total_weight_(centroids->totalWeight() + buf->totalWeight())
380  , forward_(forward) {
381  buf_->resetIndices(forward_);
382  centroids_->resetIndices(forward_);
383  mergeMinMax();
384  setCurrCentroid();
385 }
386 
387 template <typename RealType, typename IndexType>
390  if (buf_->hasNext() && centroids_->hasNext()) {
391  return (*buf_ < *centroids_) == forward_ ? buf_ : centroids_;
392  } else if (buf_->hasNext()) {
393  return buf_;
394  } else if (centroids_->hasNext()) {
395  return centroids_;
396  } else {
397  return nullptr; // hasNext() is false
398  }
399 }
400 
401 namespace {
402 
403 // Helper struct for mergeCentroids() for tracking skipped centroids.
404 template <typename RealType, typename IndexType>
405 class Skipped {
406  struct Data {
407  Centroids<RealType, IndexType>* centroid_{nullptr};
408  IndexType start_{};
409  IndexType count_merged_{};
410  IndexType count_skipped_{};
411  } data_[2];
413 
414  DEVICE static void shiftCentroids(Data& data) {
415  if (data.count_merged_) {
416  shiftRange(data.centroid_->sums_.begin() + data.start_,
417  data.count_skipped_,
418  data.count_merged_,
419  data.centroid_->inc_);
420  shiftRange(data.centroid_->counts_.begin() + data.start_,
421  data.count_skipped_,
422  data.count_merged_,
423  data.centroid_->inc_);
424  data.start_ += data.centroid_->inc_ * data.count_merged_;
425  }
426  }
427  template <typename T>
428  DEVICE static void shiftRange(T* const begin,
429  IndexType skipped,
430  IndexType const merged,
431  int const inc) {
432 #ifdef __CUDACC__
433  T* src = begin + inc * (skipped - 1);
434  T* dst = src + inc * merged;
435  for (; skipped; --skipped, src -= inc, dst -= inc) {
436  *dst = *src;
437  }
438 #else
439  if (inc == 1) {
440  std::copy_backward(begin, begin + skipped, begin + skipped + merged);
441  } else {
442  std::copy(begin + 1 - skipped, begin + 1, begin + 1 - skipped - merged);
443  }
444 #endif
445  }
446 
447  public:
449  return data_[0].centroid_ != centroid;
450  }
452  return mean_.sum_ * next_centroid->nextCount() !=
453  next_centroid->nextSum() * mean_.count_;
454  }
456  IndexType const idx = index(next_centroid);
457  if (idx == 1 && data_[1].centroid_ == nullptr) {
458  data_[1] = {next_centroid, next_centroid->next_idx_ + next_centroid->inc_, 0, 0};
459  } else if (data_[idx].count_skipped_) {
460  ++data_[idx].count_merged_;
461  } else {
462  assert(idx == 1);
463  data_[1].start_ += next_centroid->inc_;
464  }
465  }
466  DEVICE operator bool() const { return data_[0].centroid_; }
467  // Shift skipped centroids over merged centroids, and rewind next_idx_.
469  shiftCentroids(data_[0]);
470  data_[0].centroid_->next_idx_ = data_[0].start_;
471  if (data_[1].centroid_) {
472  shiftCentroids(data_[1]);
473  data_[1].centroid_->next_idx_ = data_[1].start_;
474  }
475  }
477  mean_ = Centroid<RealType, IndexType>{next_centroid->nextSum(),
478  next_centroid->nextCount()};
479  data_[0] = {next_centroid, next_centroid->next_idx_, 0, 1};
480  }
482  IndexType const idx = index(next_centroid);
483  if (idx == 1 && data_[1].centroid_ == nullptr) {
484  data_[1] = {next_centroid, next_centroid->next_idx_, 0, 1};
485  } else {
486  if (data_[idx].count_merged_) {
487  shiftCentroids(data_[idx]);
488  data_[idx].count_merged_ = 0;
489  }
490  ++data_[idx].count_skipped_;
491  }
492  next_centroid->next_idx_ += next_centroid->inc_;
493  }
494 };
495 
496 } // namespace
497 
498 // Merge as many centroids as possible for count <= max_count.
499 template <typename RealType, typename IndexType>
500 DEVICE void CentroidsMerger<RealType, IndexType>::merge(IndexType const max_count) {
501  Skipped<RealType, IndexType> skipped;
502  while (auto* next_centroid = getNextCentroid()) {
503  if (skipped) {
504  if (skipped.isDifferentMean(next_centroid)) {
505  break;
506  } else if (curr_centroid_->mergeIfFits(*next_centroid, max_count)) {
507  skipped.merged(next_centroid);
508  } else {
509  skipped.skipSubsequent(next_centroid);
510  }
511  } else if (!curr_centroid_->mergeIfFits(*next_centroid, max_count)) {
512  skipped.skipFirst(next_centroid);
513  }
514  }
515  if (skipped) {
516  skipped.shiftCentroidsAndSetNext();
517  }
518 }
519 
520 // Track min/max without assuming the data points exist in any particular centroids.
521 // Otherwise to assume and track their existence in the min/max centroids introduces
522 // significant complexity. For example, the min centroid may not remain the min
523 // centroid if it merges into it the 3rd smallest centroid, skipping the 2nd due to
524 // the scaling function constraint.
525 // When the quantile() is calculated, the min/max data points are assumed to exist
526 // in the min/max centroids for the purposes of calculating the quantile within those
527 // centroids.
528 template <typename RealType, typename IndexType>
530  if (centroids_->max_ < buf_->max_) {
531  centroids_->max_ = buf_->max_;
532  }
533  if (buf_->min_ < centroids_->min_) {
534  centroids_->min_ = buf_->min_;
535  }
536 }
537 
538 // Assume curr_centroid_ is fully merged.
539 template <typename RealType, typename IndexType>
541  prefix_sum_ += curr_centroid_->currCount();
542  setCurrCentroid();
543 }
544 
545 template <typename RealType, typename IndexType>
547  if ((curr_centroid_ = getNextCentroid())) {
548  curr_centroid_->moveNextToCurrent();
549  }
550 }
551 
552 // class TDigest<>
553 
554 template <typename RealType, typename IndexType>
556  if (buf_.sums_.full()) {
557  mergeBuffer();
558  }
559  buf_.sums_.push_back(value);
560  buf_.counts_.push_back(1);
561 }
562 
563 template <typename RealType, typename IndexType>
564 DEVICE IndexType
566  IndexType const total_weight) {
567  IndexType const max_bins = centroids_.capacity();
568  return max_bins < total_weight ? 2 * total_weight / max_bins : 0;
569 }
570 
571 // Assumes buf_ consists only of singletons.
572 template <typename RealType, typename IndexType>
574  if (buf_.size()) {
575  gpu_enabled::sort(buf_.sums_.begin(), buf_.sums_.end());
576  buf_.min_ = buf_.sums_.front();
577  buf_.max_ = buf_.sums_.back();
578  mergeCentroids(buf_);
579  }
580 }
581 
582 template <typename RealType, typename IndexType>
584  IndexType* counts,
585  IndexType size) {
586  if (size) {
587  if (buf_.capacity() == 0) {
588  buf_ = Centroids<RealType, IndexType>(sums, counts, size); // Set capacity and size
589  } else {
590  buf_.sums_.set(sums, size); // Does not change capacity
591  buf_.counts_.set(counts, size);
592  }
593  gpu_enabled::fill(buf_.counts_.begin(), buf_.counts_.end(), IndexType(1));
594  buf_.min_ = buf_.sums_.front();
595  buf_.max_ = buf_.sums_.back();
596  mergeCentroids(buf_);
597  }
598 }
599 
600 // Require:
601 // * buf centroids are not empty and sorted by (mean, -count)
602 // * buf.min_ and buf.max_ are correctly set.
603 // During filling stage, buf=buf_.
604 // During reduction, buf_=centroids_[i]
605 template <typename RealType, typename IndexType>
608  // Loop over sorted sequence of buf and centroids_.
609  // Some latter centroids may be merged into the current centroid, so the number
610  // of iterations is only at most equal to the number of initial centroids.
612  for (CM cm(&buf, &centroids_, forward_); cm.hasNext(); cm.next()) {
613  // cm.prefixSum() == 0 on first iteration.
614  // Max cardinality for current centroid to be fully merged based on scaling function.
615  IndexType const max_cardinality = maxCardinality(cm.prefixSum(), cm.totalWeight());
616  cm.merge(max_cardinality);
617  }
618  // Combine sorted centroids buf[0..curr_idx_] + centroids_[0..curr_idx_] if forward
619  centroids_.appendAndSortCurrent(buf);
620  buf.clear();
621  forward_ ^= true; // alternate merge direction on each call
622 }
623 
624 namespace {
625 template <typename CountsIterator>
626 DEVICE bool isSingleton(CountsIterator itr) {
627  return *itr == 1;
628 }
629 } // namespace
630 
631 // Assumes x < centroids_.counts_.front().
632 template <typename RealType, typename IndexType>
634  if (x < 1) {
635  return min();
636  } else if (centroids_.size() == 1) {
637  return oneCentroid(x);
638  } else if (centroids_.counts_.front() == 2) {
639  RealType const sum = centroids_.sums_.front();
640  return x == 1 ? 0.5 * sum : sum - min();
641  } else {
642  RealType const count = centroids_.counts_.front();
643  RealType const dx = x - RealType(0.5) * (1 + count);
644  RealType const mean = (centroids_.sums_.front() - min()) / (count - 1);
645  return mean + slope(0, 0 < dx) * dx;
646  }
647 }
648 
649 // x is between first and last centroids.
650 template <typename RealType, typename IndexType>
651 DEVICE RealType
653  IndexType const idx1,
654  IndexType const prefix_sum) {
655  if (isSingleton(centroids_.counts_.begin() + idx1)) {
656  RealType const sum1 = centroids_.sums_[idx1];
657  if (x == prefix_sum - centroids_.counts_[idx1]) {
658  if (isSingleton(centroids_.counts_.begin() + idx1 - 1)) {
659  return 0.5 * (centroids_.sums_[idx1 - 1] + sum1);
660  } else if (idx1 == 1 && centroids_.counts_[0] == 2) {
661  return 0.5 * (centroids_.sums_[idx1 - 1] - min() + sum1);
662  }
663  }
664  return sum1;
665  } else {
666  RealType const dx = x + RealType(0.5) * centroids_.counts_[idx1] - prefix_sum;
667  IndexType const idx2 = idx1 + 2 * (0 < dx) - 1;
668  return centroids_.mean(idx1) + slope(idx1, idx2) * dx;
669  }
670 }
671 
672 // Assumes N - centroids_.counts_.back() <= x < N, and there is more than 1 centroid.
673 template <typename RealType, typename IndexType>
675  IndexType const N) {
676  if (N - 1 < x) {
677  return max();
678  }
679  IndexType const idx1 = centroids_.size() - 1;
680  RealType const sum1 = centroids_.sums_[idx1];
681  IndexType const count1 = centroids_.counts_[idx1];
682  if (count1 == 1) { // => x == N - 1
683  if (isSingleton(centroids_.counts_.begin() + (idx1 - 1))) {
684  return 0.5 * (centroids_.sums_[idx1 - 1] + sum1);
685  } else if (idx1 == 1 && centroids_.counts_[0] == 2) {
686  return 0.5 * (centroids_.sums_[idx1 - 1] - min() + sum1);
687  } else {
688  return sum1;
689  }
690  } else if (count1 == 2) { // => 3 <= N
691  if (x == N - 1) {
692  return 0.5 * sum1;
693  } else if (x == N - 2) {
694  RealType const sum2 = centroids_.sums_[idx1 - 1];
695  if (isSingleton(centroids_.counts_.begin() + (idx1 - 1))) {
696  return 0.5 * (sum2 + sum1 - max());
697  } else if (idx1 == 1 && centroids_.counts_[0] == 2) {
698  return 0.5 * (sum2 - min() + sum1 - max());
699  }
700  }
701  return sum1 - max();
702  } else { // => 3 <= count1
703  RealType const dx = x + RealType(0.5) * (count1 + 1) - N;
704  RealType const mean = (sum1 - max()) / (count1 - 1);
705  return mean + slope(idx1, idx1 - (dx < 0)) * dx;
706  }
707 }
708 
709 // Assumes there is only 1 centroid, and 1 <= x < centroids_.counts_.front().
710 template <typename RealType, typename IndexType>
712  IndexType const N = centroids_.counts_.front();
713  if (N - 1 < x) { // includes case N == 1
714  return max();
715  } else if (N == 2) { // x == 1
716  return 0.5 * centroids_.sums_.front();
717  } else if (N == 3) { // 1 <= x <= 2
718  if (x == 2) {
719  return 0.5 * (centroids_.sums_.front() - min());
720  } else {
721  RealType const s = centroids_.sums_.front() - max();
722  return x == 1 ? 0.5 * s : s - min();
723  }
724  } else { // 3 < N
725  RealType const dx = x - RealType(0.5) * N;
726  RealType const mean = (centroids_.sums_.front() - (min() + max())) / (N - 2);
727  RealType const slope = 2 * (0 < dx ? max() - mean : mean - min()) / (N - 2);
728  return mean + slope * dx;
729  }
730 }
731 
732 // No need to calculate entire partial_sum unless multiple calls to quantile() are made.
733 template <typename RealType, typename IndexType>
734 DEVICE RealType TDigest<RealType, IndexType>::quantile(IndexType* buf, RealType const q) {
735  if (centroids_.size()) {
738  centroids_.counts_.begin(), centroids_.counts_.end(), partial_sum.begin());
739  IndexType const N = partial_sum.back();
740  RealType const x = q * N;
741  auto const it1 = gpu_enabled::upper_bound(partial_sum.begin(), partial_sum.end(), x);
742  if (it1 == partial_sum.begin()) {
743  return firstCentroid(x);
744  } else if (it1 == partial_sum.end()) { // <==> 1 <= q
745  return max();
746  } else if (it1 + 1 == partial_sum.end()) {
747  return lastCentroid(x, N);
748  } else {
749  return interiorCentroid(x, it1 - partial_sum.begin(), *it1);
750  }
751  } else {
752  return centroids_.nan;
753  }
754 }
755 
756 // Requirement: 1 < M and 0 <= idx1, idx2 < M where M = Number of centroids.
757 // Return slope of line segment connecting idx1 and idx2.
758 // If equal then assume it is the section contained within an extrema.
759 // Centroid for idx1 is not a singleton, but idx2 may be.
760 template <typename RealType, typename IndexType>
761 DEVICE RealType TDigest<RealType, IndexType>::slope(IndexType idx1, IndexType idx2) {
762  IndexType const M = centroids_.size();
763  if (idx1 == idx2) { // Line segment is contained in either the first or last centroid.
764  RealType const n = static_cast<RealType>(centroids_.counts_[idx1]);
765  RealType const s = centroids_.sums_[idx1];
766  return idx1 == 0 ? 2 * (s - n * min()) / ((n - 1) * (n - 1))
767  : 2 * (n * max() - s) / ((n - 1) * (n - 1));
768  } else {
769  bool const min1 = idx1 == 0; // idx1 is the min centroid
770  bool const max1 = idx1 == M - 1; // idx1 is the max centroid
771  bool const min2 = idx2 == 0; // idx2 is the min centroid
772  bool const max2 = idx2 == M - 1; // idx2 is the max centroid
773  RealType const n1 = static_cast<RealType>(centroids_.counts_[idx1] - min1 - max1);
774  RealType const s1 = centroids_.sums_[idx1] - (min1 ? min() : max1 ? max() : 0);
775  RealType const s2 = centroids_.sums_[idx2] - (min2 ? min() : max2 ? max() : 0);
776  if (isSingleton(centroids_.counts_.begin() + idx2)) {
777  return (idx1 < idx2 ? 2 : -2) * (n1 * s2 - s1) / (n1 * n1);
778  } else {
779  RealType const n2 = static_cast<RealType>(centroids_.counts_[idx2] - min2 - max2);
780  return (idx1 < idx2 ? 2 : -2) * (n1 * s2 - n2 * s1) / (n1 * n2 * (n1 + n2));
781  }
782  }
783 }
784 
785 } // namespace detail
786 
788 
789 } // namespace quantile
DEVICE void push_back(RealType const value, RealType const count)
Definition: quantile.h:111
DEVICE void push_back(T const &value)
Definition: VectorView.h:73
std::vector< RealType > sums_
Definition: quantile.h:167
Centroids< RealType, IndexType > * buf_
Definition: quantile.h:132
DEVICE void skipSubsequent(Centroids< RealType, IndexType > *next_centroid)
Definition: quantile.h:481
std::ostream & operator<<(std::ostream &out, Centroids< RealType, IndexType > const &centroids)
Definition: quantile.h:354
DEVICE void sort(ARGS &&... args)
Definition: gpu_enabled.h:105
DEVICE RealType quantile(RealType const q)
Definition: quantile.h:230
DEVICE IndexType capacity() const
Definition: quantile.h:74
DEVICE void fill(ARGS &&... args)
Definition: gpu_enabled.h:60
CentroidsMemory(size_t const size)
Definition: quantile.h:171
DEVICE void resetIndices(bool const forward)
Definition: quantile.h:339
DEVICE Centroids< RealType, IndexType > & centroids()
Definition: quantile.h:213
DEVICE IndexType totalWeight() const
Definition: quantile.h:160
DEVICE void moveNextToCurrent()
Definition: quantile.h:329
DEVICE void add(RealType value)
Definition: quantile.h:555
DEVICE RealType nextSum() const
Definition: quantile.h:100
DEVICE void merged(Centroids< RealType, IndexType > *next_centroid)
Definition: quantile.h:455
DEVICE bool mergeIfFits(Centroids &centroid, IndexType const max_count)
Definition: quantile.h:317
DEVICE TDigest(Memory &mem)
Definition: quantile.h:208
DEVICE void mergeBuffer()
Definition: quantile.h:573
DEVICE RealType max() const
Definition: quantile.h:186
DEVICE void partial_sum(ARGS &&... args)
Definition: gpu_enabled.h:87
VectorView< RealType > sums()
Definition: quantile.h:174
DEVICE T * data()
Definition: VectorView.h:64
VectorView< RealType > sums_
Definition: quantile.h:57
VectorView< IndexType > counts_
Definition: quantile.h:58
name
Definition: setup.py:35
DEVICE IndexType maxCardinality(IndexType const sum, IndexType const total_weight)
Definition: quantile.h:565
std::vector< IndexType > counts_
Definition: quantile.h:168
DEVICE void clear()
Definition: VectorView.h:63
DEVICE RealType lastCentroid(RealType const x, IndexType const N)
Definition: quantile.h:674
#define DEVICE
DEVICE IndexType currCount() const
Definition: quantile.h:83
DEVICE auto copy(ARGS &&... args)
Definition: gpu_enabled.h:51
DEVICE void setCentroids(Memory &mem)
Definition: quantile.h:246
DEVICE T1 value1() const
Definition: DoubleSort.h:88
DEVICE Centroids< RealType, IndexType > * getNextCentroid() const
Definition: quantile.h:389
DEVICE RealType oneCentroid(RealType const x)
Definition: quantile.h:711
DEVICE IndexType prefixSum() const
Definition: quantile.h:158
Centroids< RealType, IndexType > * centroids_
Definition: quantile.h:133
DEVICE size_type size() const
Definition: VectorView.h:83
DEVICE RealType firstCentroid(RealType const x)
Definition: quantile.h:633
DEVICE T0 value0() const
Definition: DoubleSort.h:87
DEVICE auto upper_bound(ARGS &&... args)
Definition: gpu_enabled.h:123
DEVICE Centroids(VectorView< RealType > sums, VectorView< IndexType > counts)
Definition: quantile.h:69
DEVICE CentroidsMerger(Centroids< RealType, IndexType > *buf, Centroids< RealType, IndexType > *centroids, bool const forward)
Definition: quantile.h:373
DEVICE void mergeTDigest(TDigest &t_digest)
Definition: quantile.h:223
DEVICE void setCentroids(VectorView< RealType > const sums, VectorView< IndexType > const counts)
Definition: quantile.h:252
DEVICE RealType currMean() const
Definition: quantile.h:85
DEVICE RealType interiorCentroid(RealType const x, IndexType const idx1, IndexType const prefix_sum)
Definition: quantile.h:652
Centroids< RealType, IndexType > buf_
Definition: quantile.h:183
DEVICE void skipFirst(Centroids< RealType, IndexType > *next_centroid)
Definition: quantile.h:476
static DEVICE void shiftRange(T *const begin, IndexType skipped, IndexType const merged, int const inc)
Definition: quantile.h:428
DEVICE RealType mean(IndexType const i) const
Definition: quantile.h:91
DEVICE void setBuffer(Memory &mem)
Definition: quantile.h:239
DEVICE IndexType totalWeight() const
Definition: quantile.h:120
DEVICE void reverse(ARGS &&... args)
Definition: gpu_enabled.h:96
DEVICE T * begin() const
Definition: VectorView.h:59
DEVICE T * end() const
Definition: VectorView.h:67
DEVICE Centroids(RealType *sums, IndexType *counts, IndexType const size)
Definition: quantile.h:66
DEVICE IndexType totalWeight() const
Definition: quantile.h:259
DEVICE void merge(IndexType const max_count)
Definition: quantile.h:500
DEVICE bool hasNext() const
Definition: quantile.h:150
DEVICE bool isDifferentMean(Centroids< RealType, IndexType > *next_centroid) const
Definition: quantile.h:451
DEVICE IndexType nextCount() const
Definition: quantile.h:98
DEVICE bool index(Centroids< RealType, IndexType > *centroid) const
Definition: quantile.h:448
Centroids< RealType, IndexType > * curr_centroid_
Definition: quantile.h:134
DEVICE RealType slope(IndexType const idx1, IndexType const idx2)
Definition: quantile.h:761
detail::TDigest< double, size_t > TDigest
Definition: quantile.h:787
DEVICE bool hasNext() const
Definition: quantile.h:89
DEVICE size_type capacity() const
Definition: VectorView.h:60
DEVICE bool hasCurr() const
Definition: quantile.h:87
DEVICE size_t size() const
Definition: quantile.h:118
DEVICE void mergeCentroids(Centroids< RealType, IndexType > &)
Definition: quantile.h:606
DEVICE bool operator()(Value const &a, Value const &b) const
Definition: quantile.h:273
DEVICE bool operator<(Centroids const &b) const
Definition: quantile.h:104
DEVICE RealType min() const
Definition: quantile.h:187
DEVICE auto accumulate(ARGS &&... args)
Definition: gpu_enabled.h:42
DEVICE bool isSingleton(CountsIterator itr)
Definition: quantile.h:626
DEVICE void appendAndSortCurrent(Centroids &buff)
Definition: quantile.h:283
DEVICE void mergeSorted(RealType *sums, IndexType *counts, IndexType size)
Definition: quantile.h:583
VectorView< IndexType > counts()
Definition: quantile.h:175
Centroids< RealType, IndexType > centroids_
Definition: quantile.h:182