OmniSciDB  85c2d10cdc
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
quantile.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2021 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /*
18  * @file quantile.h
19  * @author Matt Pulver <matt.pulver@omnisci.com>
20  * @description Calculate approximate median and general quantiles, based on
21  * "Computing Extremely Accurate Quantiles Using t-Digests" by T. Dunning et al.
22  * https://arxiv.org/abs/1902.04023
23  *
24  */
25 
26 #pragma once
27 
28 #include "DoubleSort.h"
29 #include "SimpleAllocator.h"
30 #include "VectorView.h"
31 #include "gpu_enabled.h"
32 
33 #ifndef __CUDACC__
34 #include <iomanip>
35 #include <ostream>
36 #endif
37 
38 #include <limits>
39 #include <memory>
40 #include <numeric>
41 #include <type_traits>
42 
43 namespace quantile {
44 
45 namespace detail {
46 
47 template <typename RealType, typename IndexType>
48 struct Centroid {
49  RealType sum_;
50  IndexType count_;
51 };
52 
53 template <typename RealType, typename IndexType>
54 struct Centroids {
55  IndexType curr_idx_; // used during mergeCentroids algorithm
56  IndexType next_idx_; // used during mergeCentroids algorithm
57  int inc_; // 1 or -1 : forward or reverse iteration
60  static constexpr RealType infinity = std::numeric_limits<RealType>::infinity();
61  static constexpr RealType nan = std::numeric_limits<RealType>::quiet_NaN();
62  RealType max_{-infinity};
63  RealType min_{infinity};
64 
65  Centroids() = default;
66 
67  DEVICE Centroids(RealType* sums, IndexType* counts, IndexType const size)
68  : sums_(sums, size, size), counts_(counts, size, size) {}
69 
71  : sums_(sums), counts_(counts) {}
72 
74 
75  DEVICE IndexType capacity() const { return sums_.capacity(); }
76 
77  DEVICE void clear() {
78  sums_.clear();
79  counts_.clear();
80  max_ = -infinity;
81  min_ = infinity;
82  }
83 
84  DEVICE IndexType currCount() const { return counts_[curr_idx_]; }
85 
86  DEVICE RealType currMean() const { return mean(curr_idx_); }
87 
88  DEVICE bool hasCurr() const { return curr_idx_ < size(); }
89 
90  DEVICE bool hasNext() const { return next_idx_ < size(); }
91 
92  DEVICE RealType mean(IndexType const i) const { return sums_[i] / counts_[i]; }
93 
94  // Return true if centroid was merged, false if not.
95  DEVICE bool mergeIfFits(Centroids& centroid, IndexType const max_count);
96 
98 
99  DEVICE IndexType nextCount() const { return counts_[next_idx_]; }
100 
101  DEVICE RealType nextSum() const { return sums_[next_idx_]; }
102 
103  // Assumes this->hasNext() and b.hasNext().
104  // Order by (nextMean(), -nextCount()).
105  DEVICE bool operator<(Centroids const& b) const {
106  // lhs < rhs is same as nextMean() < b.nextMean() without division
107  RealType const lhs = nextSum() * b.nextCount();
108  RealType const rhs = b.nextSum() * nextCount();
109  return lhs < rhs || (lhs == rhs && b.nextCount() < nextCount());
110  }
111 
112  DEVICE void push_back(RealType const value, RealType const count) {
113  sums_.push_back(value);
114  counts_.push_back(count);
115  }
116 
117  DEVICE void resetIndices(bool const forward);
118 
119  DEVICE size_t size() const { return sums_.size(); }
120 
121  DEVICE IndexType totalWeight() const {
122  return gpu_enabled::accumulate(counts_.begin(), counts_.end(), IndexType(0));
123  }
124 
125 #ifndef __CUDACC__
126  template <typename RealType2, typename IndexType2>
127  friend std::ostream& operator<<(std::ostream&, Centroids<RealType2, IndexType2> const&);
128 #endif
129 };
130 
131 template <typename RealType, typename IndexType>
136  IndexType prefix_sum_{0}; // Prefix-sum of centroid counts.
137  IndexType const total_weight_;
138  bool const forward_;
139 
140  DEVICE void mergeMinMax();
141  DEVICE void setCurrCentroid();
142 
143  public:
146  bool const forward);
147 
148  // nullptr if !hasNext()
150 
151  DEVICE bool hasNext() const { return buf_->hasNext() || centroids_->hasNext(); }
152 
153  // Merge as many centroids as possible for count <= max_count.
154  DEVICE void merge(IndexType const max_count);
155 
156  // Assume curr_centroid_ is fully merged.
157  DEVICE void next();
158 
159  DEVICE IndexType prefixSum() const { return prefix_sum_; }
160 
161  DEVICE IndexType totalWeight() const { return total_weight_; }
162 };
163 
164 // Instantiated on host for reserving memory for Centroids.
165 template <typename RealType, typename IndexType>
167  // TODO CUDA memory containers
168  std::vector<RealType> sums_;
169  std::vector<IndexType> counts_;
170 
171  public:
172  explicit CentroidsMemory(size_t const size) : sums_(size), counts_(size) {}
173  size_t nbytes() const { return sums_.size() * (sizeof(RealType) + sizeof(IndexType)); }
174  size_t size() const { return sums_.size(); }
175  VectorView<RealType> sums() { return {sums_.data(), sums_.size(), sums_.size()}; }
177  return {counts_.data(), counts_.size(), counts_.size()};
178  }
179 };
180 
181 template <typename RealType, typename IndexType = size_t>
182 class TDigest {
185  bool forward_{true}; // alternate direction on each call to mergeCentroids().
186 
187  // simple_allocator_, buf_allocate_, centroids_allocate_ are used only by allocate().
189  IndexType const buf_allocate_{0};
190  IndexType const centroids_allocate_{0};
191 
192  DEVICE RealType max() const { return centroids_.max_; }
193  DEVICE RealType min() const { return centroids_.min_; }
194 
195  DEVICE IndexType maxCardinality(IndexType const sum, IndexType const total_weight);
196 
197  // Require: centroids are sorted by (mean, -count)
199 
200  DEVICE RealType firstCentroid(RealType const x);
201  DEVICE RealType interiorCentroid(RealType const x,
202  IndexType const idx1,
203  IndexType const prefix_sum);
204  DEVICE RealType lastCentroid(RealType const x, IndexType const N);
205  DEVICE RealType oneCentroid(RealType const x);
206 
207  DEVICE RealType slope(IndexType const idx1, IndexType const idx2);
208 
209  public:
211 
212  TDigest() = default;
213 
215  : centroids_(mem.sums().data(), mem.counts().data(), mem.size()) {
216  centroids_.clear();
217  }
218 
219  DEVICE TDigest(SimpleAllocator* simple_allocator,
220  IndexType buf_allocate,
221  IndexType centroids_allocate)
222  : simple_allocator_(simple_allocator)
223  , buf_allocate_(buf_allocate)
224  , centroids_allocate_(centroids_allocate) {}
225 
227 
228  // Store value to buf_, and merge when full.
229  DEVICE void add(RealType value);
230 
231  // Allocate memory if needed by simple_allocator_.
232  DEVICE void allocate();
233 
234  DEVICE void mergeBuffer();
235 
236  // Import from sorted memory range. Ok to change values during merge.
237  DEVICE void mergeSorted(RealType* sums, IndexType* counts, IndexType size);
238 
239  DEVICE void mergeTDigest(TDigest& t_digest) {
240  mergeBuffer();
241  t_digest.mergeBuffer();
242  mergeCentroids(t_digest.centroids_);
243  }
244 
245  // Uses buf_ as scratch space.
246  DEVICE RealType quantile(RealType const q) {
247  assert(centroids_.size() <= buf_.capacity());
248  return quantile(buf_.counts_.data(), q);
249  }
250 
251  // Uses buf as scratch space.
252  DEVICE RealType quantile(IndexType* buf, RealType const q);
253 
254  // Assumes mem is externally managed.
255  DEVICE void setBuffer(Memory& mem) {
257  mem.sums().data(), mem.counts().data(), mem.size());
258  buf_.clear();
259  }
260 
261  // Assumes mem is externally managed.
264  mem.sums().data(), mem.counts().data(), mem.size());
265  centroids_.clear();
266  }
267 
269  VectorView<IndexType> const counts) {
271  centroids_.clear();
272  }
273 
274  // Total number of data points in centroids_.
275  DEVICE IndexType totalWeight() const { return centroids_.totalWeight(); }
276 };
277 
278 // Class template member definitions
279 
280 // Class Centroids<>
281 
282 namespace {
283 
284 // Order by (mean, -count)
285 template <typename RealType, typename IndexType>
288  // Assume value1() is positive and use multiplication instead of division.
289  DEVICE bool operator()(Value const& a, Value const& b) const {
290  auto const lhs = a.value0() * b.value1();
291  auto const rhs = b.value0() * a.value1();
292  return lhs < rhs || (lhs == rhs && b.value1() < a.value1());
293  }
294 };
295 
296 } // namespace
297 
298 template <typename RealType, typename IndexType>
300  if (inc_ == -1 && curr_idx_ != 0) {
301  // Shift data to the left left by curr_idx_.
302  // Prefer to copy, but thrust::copy doesn't support overlapping ranges.
303  // Reverse instead, which gets sorted below.
304  // gpu_enabled::copy(sums_.begin() + curr_idx_, sums_.end(), sums_.begin());
305  // gpu_enabled::copy(counts_.begin() + curr_idx_, counts_.end(), counts_.begin());
306  gpu_enabled::reverse(sums_.begin(), sums_.end());
307  gpu_enabled::reverse(counts_.begin(), counts_.end());
308  }
309  // Shift VectorViews to the right by buff.curr_idx_.
310  IndexType const offset = inc_ == 1 ? 0 : buff.curr_idx_;
311  IndexType const buff_size =
312  inc_ == 1 ? buff.curr_idx_ + 1 : buff.size() - buff.curr_idx_;
313  VectorView<RealType> buff_sums(buff.sums_.begin() + offset, buff_size, buff_size);
314  VectorView<IndexType> buff_counts(buff.counts_.begin() + offset, buff_size, buff_size);
315  // Copy buff into sums_ and counts_.
316  IndexType const curr_size = inc_ == 1 ? curr_idx_ + 1 : size() - curr_idx_;
317  IndexType const total_size = curr_size + buff_sums.size();
318  assert(total_size <= sums_.capacity()); // TODO proof of inequality
319  sums_.resize(total_size);
320  gpu_enabled::copy(buff_sums.begin(), buff_sums.end(), sums_.begin() + curr_size);
321  assert(total_size <= counts_.capacity());
322  counts_.resize(total_size);
323  gpu_enabled::copy(buff_counts.begin(), buff_counts.end(), counts_.begin() + curr_size);
324 
325  // Sort sums_ and counts_ by (mean, -count).
326  double_sort::Iterator<RealType, IndexType> const begin(sums_.begin(), counts_.begin());
327  double_sort::Iterator<RealType, IndexType> const end(sums_.end(), counts_.end());
328  gpu_enabled::sort(begin, end, OrderByMeanAscCountDesc<RealType, IndexType>());
329 }
330 
331 // Return true if centroid was merged, false if not.
332 template <typename RealType, typename IndexType>
334  IndexType const max_count) {
335  if (counts_[curr_idx_] + centroid.nextCount() <= max_count) {
336  sums_[curr_idx_] += centroid.nextSum();
337  counts_[curr_idx_] += centroid.nextCount();
338  centroid.next_idx_ += centroid.inc_;
339  return true;
340  }
341  return false;
342 }
343 
344 template <typename RealType, typename IndexType>
346  curr_idx_ += inc_;
347  if (curr_idx_ != next_idx_) {
348  sums_[curr_idx_] = sums_[next_idx_];
349  counts_[curr_idx_] = counts_[next_idx_];
350  }
351  next_idx_ += inc_;
352 }
353 
354 template <typename RealType, typename IndexType>
356  if (forward) {
357  inc_ = 1;
358  curr_idx_ = ~IndexType(0);
359  } else {
360  inc_ = -1;
361  curr_idx_ = size();
362  }
363  static_assert(std::is_unsigned<IndexType>::value,
364  "IndexType must be an unsigned type.");
365  next_idx_ = curr_idx_ + inc_;
366 }
367 
368 #ifndef __CUDACC__
369 template <typename RealType, typename IndexType>
370 std::ostream& operator<<(std::ostream& out,
371  Centroids<RealType, IndexType> const& centroids) {
372  out << "Centroids<" << typeid(RealType).name() << ',' << typeid(IndexType).name()
373  << ">(size(" << centroids.size() << ") curr_idx_(" << centroids.curr_idx_
374  << ") next_idx_(" << centroids.next_idx_ << ") sums_(";
375  for (IndexType i = 0; i < centroids.sums_.size(); ++i) {
376  out << (i ? " " : "") << std::setprecision(20) << centroids.sums_[i];
377  }
378  out << ") counts_(";
379  for (IndexType i = 0; i < centroids.counts_.size(); ++i) {
380  out << (i ? " " : "") << centroids.counts_[i];
381  }
382  return out << "))";
383 }
384 #endif
385 
386 // Class CentroidsMerger<>
387 
388 template <typename RealType, typename IndexType>
392  bool const forward)
393  : buf_(buf)
394  , centroids_(centroids)
395  , total_weight_(centroids->totalWeight() + buf->totalWeight())
396  , forward_(forward) {
397  buf_->resetIndices(forward_);
398  centroids_->resetIndices(forward_);
399  mergeMinMax();
400  setCurrCentroid();
401 }
402 
403 template <typename RealType, typename IndexType>
406  if (buf_->hasNext()) {
407  if (centroids_->hasNext()) {
408  return (*buf_ < *centroids_) == forward_ ? buf_ : centroids_;
409  }
410  return buf_;
411  } else if (centroids_->hasNext()) {
412  return centroids_;
413  }
414  return nullptr;
415 }
416 
417 namespace {
418 
419 // Helper struct for mergeCentroids() for tracking skipped centroids.
420 // A consecutive sequence of incoming centroids with the same mean
421 // and exceed max_count are skipped before expensive reshuffling of memory.
422 template <typename RealType, typename IndexType>
423 class Skipped {
424  struct Data {
425  Centroids<RealType, IndexType>* centroid_{nullptr};
426  IndexType start_{0};
427  IndexType count_merged_{0};
428  IndexType count_skipped_{0};
429  } data_[2];
431 
432  DEVICE static void shiftCentroids(Data& data) {
433  if (data.count_merged_) {
434  shiftRange(data.centroid_->sums_.begin() + data.start_,
435  data.count_skipped_,
436  data.count_merged_,
437  data.centroid_->inc_);
438  shiftRange(data.centroid_->counts_.begin() + data.start_,
439  data.count_skipped_,
440  data.count_merged_,
441  data.centroid_->inc_);
442  data.start_ += data.centroid_->inc_ * data.count_merged_;
443  }
444  }
445  template <typename T>
446  DEVICE static void shiftRange(T* const begin,
447  IndexType skipped,
448  IndexType const merged,
449  int const inc) {
450 #ifdef __CUDACC__
451  T* src = begin + inc * (skipped - 1);
452  T* dst = src + inc * merged;
453  for (; skipped; --skipped, src -= inc, dst -= inc) {
454  *dst = *src;
455  }
456 #else
457  if (inc == 1) {
458  std::copy_backward(begin, begin + skipped, begin + skipped + merged);
459  } else {
460  std::copy(begin + 1 - skipped, begin + 1, begin + 1 - skipped - merged);
461  }
462 #endif
463  }
464 
465  public:
467  return data_[0].centroid_ != centroid;
468  }
470  return mean_.sum_ * next_centroid->nextCount() !=
471  next_centroid->nextSum() * mean_.count_;
472  }
474  IndexType const idx = index(next_centroid);
475  if (data_[idx].count_skipped_) {
476  ++data_[idx].count_merged_;
477  }
478  }
479  DEVICE operator bool() const { return data_[0].centroid_; }
480  // Shift skipped centroids over merged centroids, and rewind next_idx_.
482  shiftCentroids(data_[0]);
483  data_[0].centroid_->next_idx_ = data_[0].start_;
484  if (data_[1].centroid_) {
485  shiftCentroids(data_[1]);
486  data_[1].centroid_->next_idx_ = data_[1].start_;
487  }
488  }
490  mean_ = Centroid<RealType, IndexType>{next_centroid->nextSum(),
491  next_centroid->nextCount()};
492  data_[0] = {next_centroid, next_centroid->next_idx_, 0, 1};
493  next_centroid->next_idx_ += next_centroid->inc_;
494  }
496  IndexType const idx = index(next_centroid);
497  if (idx == 1 && data_[1].centroid_ == nullptr) {
498  data_[1] = {next_centroid, next_centroid->next_idx_, 0, 1};
499  } else {
500  if (data_[idx].count_merged_) {
501  shiftCentroids(data_[idx]);
502  data_[idx].count_merged_ = 0;
503  }
504  ++data_[idx].count_skipped_;
505  }
506  next_centroid->next_idx_ += next_centroid->inc_;
507  }
508 };
509 
510 } // namespace
511 
512 // Merge as many centroids as possible for count <= max_count.
513 template <typename RealType, typename IndexType>
514 DEVICE void CentroidsMerger<RealType, IndexType>::merge(IndexType const max_count) {
515  Skipped<RealType, IndexType> skipped;
516  while (auto* next_centroid = getNextCentroid()) {
517  if (skipped) {
518  if (skipped.isDifferentMean(next_centroid)) {
519  break;
520  } else if (curr_centroid_->mergeIfFits(*next_centroid, max_count)) {
521  skipped.merged(next_centroid);
522  } else {
523  skipped.skipSubsequent(next_centroid);
524  }
525  } else if (!curr_centroid_->mergeIfFits(*next_centroid, max_count)) {
526  skipped.skipFirst(next_centroid);
527  }
528  }
529  if (skipped) {
530  skipped.shiftCentroidsAndSetNext();
531  }
532 }
533 
534 // Track min/max without assuming the data points exist in any particular centroids.
535 // Otherwise to assume and track their existence in the min/max centroids introduces
536 // significant complexity. For example, the min centroid may not remain the min
537 // centroid if it merges into it the 3rd smallest centroid, skipping the 2nd due to
538 // the scaling function constraint.
539 // When the quantile() is calculated, the min/max data points are assumed to exist
540 // in the min/max centroids for the purposes of calculating the quantile within those
541 // centroids.
542 template <typename RealType, typename IndexType>
544  if (centroids_->max_ < buf_->max_) {
545  centroids_->max_ = buf_->max_;
546  }
547  if (buf_->min_ < centroids_->min_) {
548  centroids_->min_ = buf_->min_;
549  }
550 }
551 
552 // Assume curr_centroid_ is fully merged.
553 template <typename RealType, typename IndexType>
555  prefix_sum_ += curr_centroid_->currCount();
556  setCurrCentroid();
557 }
558 
559 template <typename RealType, typename IndexType>
561  if ((curr_centroid_ = getNextCentroid())) {
562  curr_centroid_->moveNextToCurrent();
563  }
564 }
565 
566 // class TDigest<>
567 
568 template <typename RealType, typename IndexType>
570  if (buf_.sums_.full()) {
571  mergeBuffer();
572  }
573  buf_.sums_.push_back(value);
574  buf_.counts_.push_back(1);
575 }
576 
577 // Assumes buf_ is allocated iff centroids_ is allocated.
578 template <typename RealType, typename IndexType>
580  if (buf_.capacity() == 0) {
581  auto* p0 = simple_allocator_->allocate(buf_allocate_ * sizeof(RealType));
582  auto* p1 = simple_allocator_->allocate(buf_allocate_ * sizeof(IndexType));
584  VectorView<RealType>(reinterpret_cast<RealType*>(p0), 0, buf_allocate_),
585  VectorView<IndexType>(reinterpret_cast<IndexType*>(p1), 0, buf_allocate_));
586  p0 = simple_allocator_->allocate(centroids_allocate_ * sizeof(RealType));
587  p1 = simple_allocator_->allocate(centroids_allocate_ * sizeof(IndexType));
588  centroids_ = Centroids<RealType, IndexType>(
589  VectorView<RealType>(reinterpret_cast<RealType*>(p0), 0, centroids_allocate_),
590  VectorView<IndexType>(reinterpret_cast<IndexType*>(p1), 0, centroids_allocate_));
591  }
592 }
593 
594 template <typename RealType, typename IndexType>
595 DEVICE IndexType
597  IndexType const total_weight) {
598  IndexType const max_bins = centroids_.capacity();
599  return max_bins < total_weight ? 2 * total_weight / max_bins : 0;
600 }
601 
602 // Assumes buf_ consists only of singletons.
603 template <typename RealType, typename IndexType>
605  if (buf_.size()) {
606  gpu_enabled::sort(buf_.sums_.begin(), buf_.sums_.end());
607  buf_.min_ = buf_.sums_.front();
608  buf_.max_ = buf_.sums_.back();
609  mergeCentroids(buf_);
610  }
611 }
612 
613 template <typename RealType, typename IndexType>
615  IndexType* counts,
616  IndexType size) {
617  if (size) {
618  if (buf_.capacity() == 0) {
619  buf_ = Centroids<RealType, IndexType>(sums, counts, size); // Set capacity and size
620  } else {
621  buf_.sums_.set(sums, size); // Does not change capacity
622  buf_.counts_.set(counts, size);
623  }
624  gpu_enabled::fill(buf_.counts_.begin(), buf_.counts_.end(), IndexType(1));
625  buf_.min_ = buf_.sums_.front();
626  buf_.max_ = buf_.sums_.back();
627  mergeCentroids(buf_);
628  }
629 }
630 
631 // Require:
632 // * buf centroids are not empty and sorted by (mean, -count)
633 // * buf.min_ and buf.max_ are correctly set.
634 // During filling stage, buf=buf_.
635 // During reduction, buf_=centroids_[i]
636 template <typename RealType, typename IndexType>
639  // Loop over sorted sequence of buf and centroids_.
640  // Some latter centroids may be merged into the current centroid, so the number
641  // of iterations is only at most equal to the number of initial centroids.
643  for (CM cm(&buf, &centroids_, forward_); cm.hasNext(); cm.next()) {
644  // cm.prefixSum() == 0 on first iteration.
645  // Max cardinality for current centroid to be fully merged based on scaling function.
646  IndexType const max_cardinality = maxCardinality(cm.prefixSum(), cm.totalWeight());
647  cm.merge(max_cardinality);
648  }
649  // Combine sorted centroids buf[0..curr_idx_] + centroids_[0..curr_idx_] if forward
650  centroids_.appendAndSortCurrent(buf);
651  buf.clear();
652  forward_ ^= true; // alternate merge direction on each call
653 }
654 
655 namespace {
656 template <typename CountsIterator>
657 DEVICE bool isSingleton(CountsIterator itr) {
658  return *itr == 1;
659 }
660 } // namespace
661 
662 // Assumes x < centroids_.counts_.front().
663 template <typename RealType, typename IndexType>
665  if (x < 1) {
666  return min();
667  } else if (centroids_.size() == 1) {
668  return oneCentroid(x);
669  } else if (centroids_.counts_.front() == 2) {
670  RealType const sum = centroids_.sums_.front();
671  return x == 1 ? 0.5 * sum : sum - min();
672  } else {
673  RealType const count = centroids_.counts_.front();
674  RealType const dx = x - RealType(0.5) * (1 + count);
675  RealType const mean = (centroids_.sums_.front() - min()) / (count - 1);
676  return mean + slope(0, 0 < dx) * dx;
677  }
678 }
679 
680 // x is between first and last centroids.
681 template <typename RealType, typename IndexType>
682 DEVICE RealType
684  IndexType const idx1,
685  IndexType const prefix_sum) {
686  if (isSingleton(centroids_.counts_.begin() + idx1)) {
687  RealType const sum1 = centroids_.sums_[idx1];
688  if (x == prefix_sum - centroids_.counts_[idx1]) {
689  if (isSingleton(centroids_.counts_.begin() + idx1 - 1)) {
690  return 0.5 * (centroids_.sums_[idx1 - 1] + sum1);
691  } else if (idx1 == 1 && centroids_.counts_[0] == 2) {
692  return 0.5 * (centroids_.sums_[idx1 - 1] - min() + sum1);
693  }
694  }
695  return sum1;
696  } else {
697  RealType const dx = x + RealType(0.5) * centroids_.counts_[idx1] - prefix_sum;
698  IndexType const idx2 = idx1 + 2 * (0 < dx) - 1;
699  return centroids_.mean(idx1) + slope(idx1, idx2) * dx;
700  }
701 }
702 
703 // Assumes N - centroids_.counts_.back() <= x < N, and there is more than 1 centroid.
704 template <typename RealType, typename IndexType>
706  IndexType const N) {
707  if (N - 1 < x) {
708  return max();
709  }
710  IndexType const idx1 = centroids_.size() - 1;
711  RealType const sum1 = centroids_.sums_[idx1];
712  IndexType const count1 = centroids_.counts_[idx1];
713  if (count1 == 1) { // => x == N - 1
714  if (isSingleton(centroids_.counts_.begin() + (idx1 - 1))) {
715  return 0.5 * (centroids_.sums_[idx1 - 1] + sum1);
716  } else if (idx1 == 1 && centroids_.counts_[0] == 2) {
717  return 0.5 * (centroids_.sums_[idx1 - 1] - min() + sum1);
718  } else {
719  return sum1;
720  }
721  } else if (count1 == 2) { // => 3 <= N
722  if (x == N - 1) {
723  return 0.5 * sum1;
724  } else if (x == N - 2) {
725  RealType const sum2 = centroids_.sums_[idx1 - 1];
726  if (isSingleton(centroids_.counts_.begin() + (idx1 - 1))) {
727  return 0.5 * (sum2 + sum1 - max());
728  } else if (idx1 == 1 && centroids_.counts_[0] == 2) {
729  return 0.5 * (sum2 - min() + sum1 - max());
730  }
731  }
732  return sum1 - max();
733  } else { // => 3 <= count1
734  RealType const dx = x + RealType(0.5) * (count1 + 1) - N;
735  RealType const mean = (sum1 - max()) / (count1 - 1);
736  return mean + slope(idx1, idx1 - (dx < 0)) * dx;
737  }
738 }
739 
740 // Assumes there is only 1 centroid, and 1 <= x < centroids_.counts_.front().
741 template <typename RealType, typename IndexType>
743  IndexType const N = centroids_.counts_.front();
744  if (N - 1 < x) { // includes case N == 1
745  return max();
746  } else if (N == 2) { // x == 1
747  return 0.5 * centroids_.sums_.front();
748  } else if (N == 3) { // 1 <= x <= 2
749  if (x == 2) {
750  return 0.5 * (centroids_.sums_.front() - min());
751  } else {
752  RealType const s = centroids_.sums_.front() - max();
753  return x == 1 ? 0.5 * s : s - min();
754  }
755  } else { // 3 < N
756  RealType const dx = x - RealType(0.5) * N;
757  RealType const mean = (centroids_.sums_.front() - (min() + max())) / (N - 2);
758  RealType const slope = 2 * (0 < dx ? max() - mean : mean - min()) / (N - 2);
759  return mean + slope * dx;
760  }
761 }
762 
763 // No need to calculate entire partial_sum unless multiple calls to quantile() are made.
764 template <typename RealType, typename IndexType>
765 DEVICE RealType TDigest<RealType, IndexType>::quantile(IndexType* buf, RealType const q) {
766  if (centroids_.size()) {
767  VectorView<IndexType> partial_sum(buf, centroids_.size(), centroids_.size());
769  centroids_.counts_.begin(), centroids_.counts_.end(), partial_sum.begin());
770  IndexType const N = partial_sum.back();
771  RealType const x = q * N;
772  auto const it1 = gpu_enabled::upper_bound(partial_sum.begin(), partial_sum.end(), x);
773  if (it1 == partial_sum.begin()) {
774  return firstCentroid(x);
775  } else if (it1 == partial_sum.end()) { // <==> 1 <= q
776  return max();
777  } else if (it1 + 1 == partial_sum.end()) {
778  return lastCentroid(x, N);
779  } else {
780  return interiorCentroid(x, it1 - partial_sum.begin(), *it1);
781  }
782  } else {
783  return centroids_.nan;
784  }
785 }
786 
787 // Requirement: 1 < M and 0 <= idx1, idx2 < M where M = Number of centroids.
788 // Return slope of line segment connecting idx1 and idx2.
789 // If equal then assume it is the section contained within an extrema.
790 // Centroid for idx1 is not a singleton, but idx2 may be.
791 template <typename RealType, typename IndexType>
792 DEVICE RealType TDigest<RealType, IndexType>::slope(IndexType idx1, IndexType idx2) {
793  IndexType const M = centroids_.size();
794  if (idx1 == idx2) { // Line segment is contained in either the first or last centroid.
795  RealType const n = static_cast<RealType>(centroids_.counts_[idx1]);
796  RealType const s = centroids_.sums_[idx1];
797  return idx1 == 0 ? 2 * (s - n * min()) / ((n - 1) * (n - 1))
798  : 2 * (n * max() - s) / ((n - 1) * (n - 1));
799  } else {
800  bool const min1 = idx1 == 0; // idx1 is the min centroid
801  bool const max1 = idx1 == M - 1; // idx1 is the max centroid
802  bool const min2 = idx2 == 0; // idx2 is the min centroid
803  bool const max2 = idx2 == M - 1; // idx2 is the max centroid
804  RealType const n1 = static_cast<RealType>(centroids_.counts_[idx1] - min1 - max1);
805  RealType const s1 = centroids_.sums_[idx1] - (min1 ? min() : max1 ? max() : 0);
806  RealType const s2 = centroids_.sums_[idx2] - (min2 ? min() : max2 ? max() : 0);
807  if (isSingleton(centroids_.counts_.begin() + idx2)) {
808  return (idx1 < idx2 ? 2 : -2) * (n1 * s2 - s1) / (n1 * n1);
809  } else {
810  RealType const n2 = static_cast<RealType>(centroids_.counts_[idx2] - min2 - max2);
811  return (idx1 < idx2 ? 2 : -2) * (n1 * s2 - n2 * s1) / (n1 * n2 * (n1 + n2));
812  }
813  }
814 }
815 
816 } // namespace detail
817 
819 
820 } // namespace quantile
DEVICE auto upper_bound(ARGS &&...args)
Definition: gpu_enabled.h:123
static constexpr RealType infinity
Definition: quantile.h:60
DEVICE void push_back(RealType const value, RealType const count)
Definition: quantile.h:112
DEVICE void push_back(T const &value)
Definition: VectorView.h:74
std::vector< RealType > sums_
Definition: quantile.h:168
DEVICE size_type capacity() const
Definition: VectorView.h:61
Centroids< RealType, IndexType > * buf_
Definition: quantile.h:133
DEVICE void skipSubsequent(Centroids< RealType, IndexType > *next_centroid)
Definition: quantile.h:495
std::ostream & operator<<(std::ostream &out, Centroids< RealType, IndexType > const &centroids)
Definition: quantile.h:370
DEVICE RealType quantile(RealType const q)
Definition: quantile.h:246
CentroidsMemory(size_t const size)
Definition: quantile.h:172
DEVICE RealType mean(IndexType const i) const
Definition: quantile.h:92
DEVICE bool index(Centroids< RealType, IndexType > *centroid) const
Definition: quantile.h:466
DEVICE void resetIndices(bool const forward)
Definition: quantile.h:355
DEVICE Centroids< RealType, IndexType > & centroids()
Definition: quantile.h:226
DEVICE void moveNextToCurrent()
Definition: quantile.h:345
DEVICE void add(RealType value)
Definition: quantile.h:569
string name
Definition: setup.in.py:62
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
DEVICE void merged(Centroids< RealType, IndexType > *next_centroid)
Definition: quantile.h:473
DEVICE bool mergeIfFits(Centroids &centroid, IndexType const max_count)
Definition: quantile.h:333
DEVICE bool hasNext() const
Definition: quantile.h:90
DEVICE TDigest(Memory &mem)
Definition: quantile.h:214
DEVICE void mergeBuffer()
Definition: quantile.h:604
DEVICE IndexType totalWeight() const
Definition: quantile.h:275
VectorView< RealType > sums()
Definition: quantile.h:175
DEVICE T * data()
Definition: VectorView.h:65
VectorView< RealType > sums_
Definition: quantile.h:58
VectorView< IndexType > counts_
Definition: quantile.h:59
DEVICE IndexType maxCardinality(IndexType const sum, IndexType const total_weight)
Definition: quantile.h:596
std::vector< IndexType > counts_
Definition: quantile.h:169
DEVICE void clear()
Definition: VectorView.h:64
DEVICE RealType lastCentroid(RealType const x, IndexType const N)
Definition: quantile.h:705
#define DEVICE
DEVICE RealType currMean() const
Definition: quantile.h:86
DEVICE void setCentroids(Memory &mem)
Definition: quantile.h:262
DEVICE RealType oneCentroid(RealType const x)
Definition: quantile.h:742
DEVICE size_type size() const
Definition: VectorView.h:84
Centroids< RealType, IndexType > * centroids_
Definition: quantile.h:134
DEVICE RealType firstCentroid(RealType const x)
Definition: quantile.h:664
DEVICE RealType max() const
Definition: quantile.h:192
DEVICE void fill(ARGS &&...args)
Definition: gpu_enabled.h:60
DEVICE void set(T *data, size_type const size)
Definition: VectorView.h:80
DEVICE auto copy(ARGS &&...args)
Definition: gpu_enabled.h:51
int count
DEVICE Centroids(VectorView< RealType > sums, VectorView< IndexType > counts)
Definition: quantile.h:70
DEVICE T * begin() const
Definition: VectorView.h:60
DEVICE CentroidsMerger(Centroids< RealType, IndexType > *buf, Centroids< RealType, IndexType > *centroids, bool const forward)
Definition: quantile.h:389
DEVICE void mergeTDigest(TDigest &t_digest)
Definition: quantile.h:239
DEVICE IndexType nextCount() const
Definition: quantile.h:99
DEVICE void partial_sum(ARGS &&...args)
Definition: gpu_enabled.h:87
DEVICE void setCentroids(VectorView< RealType > const sums, VectorView< IndexType > const counts)
Definition: quantile.h:268
DEVICE RealType interiorCentroid(RealType const x, IndexType const idx1, IndexType const prefix_sum)
Definition: quantile.h:683
DEVICE auto accumulate(ARGS &&...args)
Definition: gpu_enabled.h:42
Centroids< RealType, IndexType > buf_
Definition: quantile.h:183
IndexType const buf_allocate_
Definition: quantile.h:189
DEVICE RealType nextSum() const
Definition: quantile.h:101
DEVICE void skipFirst(Centroids< RealType, IndexType > *next_centroid)
Definition: quantile.h:489
DEVICE void allocate()
Definition: quantile.h:579
static DEVICE void shiftRange(T *const begin, IndexType skipped, IndexType const merged, int const inc)
Definition: quantile.h:446
DEVICE void setBuffer(Memory &mem)
Definition: quantile.h:255
DEVICE bool operator()(Value const &a, Value const &b) const
Definition: quantile.h:289
static constexpr RealType nan
Definition: quantile.h:61
DEVICE Centroids< RealType, IndexType > * getNextCentroid() const
Definition: quantile.h:405
DEVICE IndexType prefixSum() const
Definition: quantile.h:159
DEVICE Centroids(RealType *sums, IndexType *counts, IndexType const size)
Definition: quantile.h:67
SimpleAllocator *const simple_allocator_
Definition: quantile.h:188
DEVICE void merge(IndexType const max_count)
Definition: quantile.h:514
DEVICE bool operator<(Centroids const &b) const
Definition: quantile.h:105
DEVICE bool hasNext() const
Definition: quantile.h:151
DEVICE TDigest(SimpleAllocator *simple_allocator, IndexType buf_allocate, IndexType centroids_allocate)
Definition: quantile.h:219
IndexType const centroids_allocate_
Definition: quantile.h:190
Centroids< RealType, IndexType > * curr_centroid_
Definition: quantile.h:135
DEVICE size_t size() const
Definition: quantile.h:119
DEVICE IndexType currCount() const
Definition: quantile.h:84
DEVICE IndexType totalWeight() const
Definition: quantile.h:121
DEVICE bool isDifferentMean(Centroids< RealType, IndexType > *next_centroid) const
Definition: quantile.h:469
DEVICE T0 value0() const
Definition: DoubleSort.h:87
DEVICE RealType slope(IndexType const idx1, IndexType const idx2)
Definition: quantile.h:792
DEVICE IndexType capacity() const
Definition: quantile.h:75
DEVICE IndexType totalWeight() const
Definition: quantile.h:161
DEVICE T1 value1() const
Definition: DoubleSort.h:88
DEVICE void mergeCentroids(Centroids< RealType, IndexType > &)
Definition: quantile.h:637
DEVICE void reverse(ARGS &&...args)
Definition: gpu_enabled.h:96
DEVICE RealType min() const
Definition: quantile.h:193
DEVICE bool isSingleton(CountsIterator itr)
Definition: quantile.h:657
DEVICE void appendAndSortCurrent(Centroids &buff)
Definition: quantile.h:299
DEVICE bool hasCurr() const
Definition: quantile.h:88
DEVICE void mergeSorted(RealType *sums, IndexType *counts, IndexType size)
Definition: quantile.h:614
VectorView< IndexType > counts()
Definition: quantile.h:176
Centroids< RealType, IndexType > centroids_
Definition: quantile.h:184
DEVICE T * end() const
Definition: VectorView.h:68