OmniSciDB  d2f719934e
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
quantile.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2021 OmniSci, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /*
18  * @file quantile.h
19  * @author Matt Pulver <matt.pulver@omnisci.com>
20  * @description Calculate approximate median and general quantiles, based on
21  * "Computing Extremely Accurate Quantiles Using t-Digests" by T. Dunning et al.
22  * https://arxiv.org/abs/1902.04023
23  *
24  */
25 
26 #pragma once
27 
28 #include "DoubleSort.h"
29 #include "SimpleAllocator.h"
30 #include "VectorView.h"
31 #include "gpu_enabled.h"
32 
33 #ifndef __CUDACC__
34 #include <iomanip>
35 #include <ostream>
36 #endif
37 
38 #include <cmath>
39 #include <limits>
40 #include <memory>
41 #include <numeric>
42 #include <optional>
43 #include <type_traits>
44 
45 namespace quantile {
46 
47 namespace detail {
48 
49 template <typename RealType, typename IndexType>
50 struct Centroid {
51  RealType sum_;
52  IndexType count_;
53 };
54 
55 template <typename RealType, typename IndexType>
56 struct Centroids {
57  IndexType curr_idx_; // used during mergeCentroids algorithm
58  IndexType next_idx_; // used during mergeCentroids algorithm
59  int inc_; // 1 or -1 : forward or reverse iteration
62  static constexpr RealType infinity = std::numeric_limits<RealType>::infinity();
63  static constexpr RealType nan = std::numeric_limits<RealType>::quiet_NaN();
64  RealType max_{-infinity};
65  RealType min_{infinity};
66 
67  Centroids() = default;
68 
69  DEVICE Centroids(RealType* sums, IndexType* counts, IndexType const size)
70  : sums_(sums, size, size), counts_(counts, size, size) {}
71 
73  : sums_(sums), counts_(counts) {}
74 
76 
77  DEVICE IndexType capacity() const { return sums_.capacity(); }
78 
79  DEVICE void clear() {
80  sums_.clear();
81  counts_.clear();
82  max_ = -infinity;
83  min_ = infinity;
84  }
85 
86  DEVICE IndexType currCount() const { return counts_[curr_idx_]; }
87 
88  DEVICE RealType currMean() const { return mean(curr_idx_); }
89 
90  DEVICE bool hasCurr() const { return curr_idx_ < size(); }
91 
92  DEVICE bool hasNext() const { return next_idx_ < size(); }
93 
94  DEVICE RealType mean(IndexType const i) const { return sums_[i] / counts_[i]; }
95 
96  // Return true if centroid was merged, false if not.
97  DEVICE bool mergeIfFits(Centroids& centroid, IndexType const max_count);
98 
100 
101  DEVICE IndexType nextCount() const { return counts_[next_idx_]; }
102 
103  DEVICE RealType nextSum() const { return sums_[next_idx_]; }
104 
105  // Assumes this->hasNext() and b.hasNext().
106  // Order by (nextMean(), -nextCount()).
107  DEVICE bool operator<(Centroids const& b) const {
108  // lhs < rhs is same as nextMean() < b.nextMean() without division
109  RealType const lhs = nextSum() * b.nextCount();
110  RealType const rhs = b.nextSum() * nextCount();
111  return lhs < rhs || (lhs == rhs && b.nextCount() < nextCount());
112  }
113 
114  DEVICE void push_back(RealType const value, RealType const count) {
115  sums_.push_back(value);
116  counts_.push_back(count);
117  }
118 
119  DEVICE void resetIndices(bool const forward);
120 
121  DEVICE size_t size() const { return sums_.size(); }
122 
123  DEVICE IndexType totalWeight() const {
124  return gpu_enabled::accumulate(counts_.begin(), counts_.end(), IndexType(0));
125  }
126 
127 #ifndef __CUDACC__
128  template <typename RealType2, typename IndexType2>
129  friend std::ostream& operator<<(std::ostream&, Centroids<RealType2, IndexType2> const&);
130 #endif
131 };
132 
133 template <typename RealType, typename IndexType>
138  IndexType prefix_sum_{0}; // Prefix-sum of centroid counts.
139  IndexType const total_weight_;
140  bool const forward_;
141 
142  DEVICE void mergeMinMax();
143  DEVICE void setCurrCentroid();
144 
145  public:
148  bool const forward);
149 
150  // nullptr if !hasNext()
152 
153  DEVICE bool hasNext() const { return buf_->hasNext() || centroids_->hasNext(); }
154 
155  // Merge as many centroids as possible for count <= max_count.
156  DEVICE void merge(IndexType const max_count);
157 
158  // Assume curr_centroid_ is fully merged.
159  DEVICE void next();
160 
161  DEVICE IndexType prefixSum() const { return prefix_sum_; }
162 
163  DEVICE IndexType totalWeight() const { return total_weight_; }
164 };
165 
166 // Instantiated on host for reserving memory for Centroids.
167 template <typename RealType, typename IndexType>
169  // TODO CUDA memory containers
170  std::vector<RealType> sums_;
171  std::vector<IndexType> counts_;
172 
173  public:
174  explicit CentroidsMemory(size_t const size) : sums_(size), counts_(size) {}
175  size_t nbytes() const { return sums_.size() * (sizeof(RealType) + sizeof(IndexType)); }
176  size_t size() const { return sums_.size(); }
177  VectorView<RealType> sums() { return {sums_.data(), sums_.size(), sums_.size()}; }
179  return {counts_.data(), counts_.size(), counts_.size()};
180  }
181 };
182 
183 template <typename RealType, typename IndexType = size_t>
184 class TDigest {
187  bool forward_{true}; // alternate direction on each call to mergeCentroids().
188 
189  // simple_allocator_, buf_allocate_, centroids_allocate_ are used only by allocate().
190  std::optional<RealType> const q_{std::nullopt}; // Optional preset quantile parameter.
191  bool const use_linear_scaling_function_{false};
193  IndexType const buf_allocate_{0};
194  IndexType const centroids_allocate_{0};
195 
196  DEVICE RealType max() const { return centroids_.max_; }
197  DEVICE RealType min() const { return centroids_.min_; }
198 
199  DEVICE IndexType maxCardinality(IndexType const sum,
200  IndexType const total_weight,
201  RealType const c);
202 
203  // Require: centroids are sorted by (mean, -count)
205 
206  DEVICE RealType firstCentroid(RealType const x);
207  DEVICE RealType interiorCentroid(RealType const x,
208  IndexType const idx1,
209  IndexType const prefix_sum);
210  DEVICE RealType lastCentroid(RealType const x, IndexType const N);
211  DEVICE RealType oneCentroid(RealType const x);
212 
213  DEVICE RealType slope(IndexType const idx1, IndexType const idx2);
214 
215  public:
217 
218  TDigest() = default;
219 
221  : centroids_(mem.sums().data(), mem.counts().data(), mem.size()) {
222  centroids_.clear();
223  }
224 
225  DEVICE TDigest(RealType q,
226  SimpleAllocator* simple_allocator,
227  IndexType buf_allocate,
228  IndexType centroids_allocate)
229  : q_(q)
230  , use_linear_scaling_function_(q_ && 0.1 <= *q_ && *q_ <= 0.9)
231  , simple_allocator_(simple_allocator)
232  , buf_allocate_(buf_allocate)
233  , centroids_allocate_(centroids_allocate) {}
234 
236 
237  // Store value to buf_, and merge when full.
238  DEVICE void add(RealType value);
239 
240  // Allocate memory if needed by simple_allocator_.
241  DEVICE void allocate();
242 
243  DEVICE void mergeBuffer();
244 
245  // Import from sorted memory range. Ok to change values during merge.
246  DEVICE void mergeSorted(RealType* sums, IndexType* counts, IndexType size);
247 
248  DEVICE void mergeTDigest(TDigest& t_digest) {
249  mergeBuffer();
250  t_digest.mergeBuffer();
251  mergeCentroids(t_digest.centroids_);
252  }
253 
254  // Uses buf as scratch space.
255  DEVICE RealType quantile(IndexType* buf, RealType const q);
256 
257  // Uses buf_ as scratch space.
258  DEVICE RealType quantile(RealType const q) {
259  assert(centroids_.size() <= buf_.capacity());
260  return quantile(buf_.counts_.data(), q);
261  }
262 
263  DEVICE RealType quantile() { return q_ ? quantile(*q_) : centroids_.nan; }
264 
265  // Assumes mem is externally managed.
266  DEVICE void setBuffer(Memory& mem) {
268  mem.sums().data(), mem.counts().data(), mem.size());
269  buf_.clear();
270  }
271 
272  // Assumes mem is externally managed.
275  mem.sums().data(), mem.counts().data(), mem.size());
276  centroids_.clear();
277  }
278 
280  VectorView<IndexType> const counts) {
282  centroids_.clear();
283  }
284 
285  // Total number of data points in centroids_.
286  DEVICE IndexType totalWeight() const { return centroids_.totalWeight(); }
287 };
288 
289 // Class template member definitions
290 
291 // Class Centroids<>
292 
293 namespace {
294 
295 // Order by (mean, -count)
296 template <typename RealType, typename IndexType>
299  // Assume value1() is positive and use multiplication instead of division.
300  DEVICE bool operator()(Value const& a, Value const& b) const {
301  auto const lhs = a.value0() * b.value1();
302  auto const rhs = b.value0() * a.value1();
303  return lhs < rhs || (lhs == rhs && b.value1() < a.value1());
304  }
305 };
306 
307 } // namespace
308 
309 template <typename RealType, typename IndexType>
311  if (inc_ == -1 && curr_idx_ != 0) {
312  // Shift data to the left left by curr_idx_.
313  // Prefer to copy, but thrust::copy doesn't support overlapping ranges.
314  // Reverse instead, which gets sorted below.
315  // gpu_enabled::copy(sums_.begin() + curr_idx_, sums_.end(), sums_.begin());
316  // gpu_enabled::copy(counts_.begin() + curr_idx_, counts_.end(), counts_.begin());
317  gpu_enabled::reverse(sums_.begin(), sums_.end());
318  gpu_enabled::reverse(counts_.begin(), counts_.end());
319  }
320  // Shift VectorViews to the right by buff.curr_idx_.
321  IndexType const offset = inc_ == 1 ? 0 : buff.curr_idx_;
322  IndexType const buff_size =
323  inc_ == 1 ? buff.curr_idx_ + 1 : buff.size() - buff.curr_idx_;
324  VectorView<RealType> buff_sums(buff.sums_.begin() + offset, buff_size, buff_size);
325  VectorView<IndexType> buff_counts(buff.counts_.begin() + offset, buff_size, buff_size);
326  // Copy buff into sums_ and counts_.
327  IndexType const curr_size = inc_ == 1 ? curr_idx_ + 1 : size() - curr_idx_;
328  IndexType const total_size = curr_size + buff_sums.size();
329  assert(total_size <= sums_.capacity()); // TODO proof of inequality
330  sums_.resize(total_size);
331  gpu_enabled::copy(buff_sums.begin(), buff_sums.end(), sums_.begin() + curr_size);
332  assert(total_size <= counts_.capacity());
333  counts_.resize(total_size);
334  gpu_enabled::copy(buff_counts.begin(), buff_counts.end(), counts_.begin() + curr_size);
335 
336  // Sort sums_ and counts_ by (mean, -count).
337  double_sort::Iterator<RealType, IndexType> const begin(sums_.begin(), counts_.begin());
338  double_sort::Iterator<RealType, IndexType> const end(sums_.end(), counts_.end());
339  gpu_enabled::sort(begin, end, OrderByMeanAscCountDesc<RealType, IndexType>());
340 }
341 
342 // Return true if centroid was merged, false if not.
343 template <typename RealType, typename IndexType>
345  IndexType const max_count) {
346  if (counts_[curr_idx_] + centroid.nextCount() <= max_count) {
347  sums_[curr_idx_] += centroid.nextSum();
348  counts_[curr_idx_] += centroid.nextCount();
349  centroid.next_idx_ += centroid.inc_;
350  return true;
351  }
352  return false;
353 }
354 
355 template <typename RealType, typename IndexType>
357  curr_idx_ += inc_;
358  if (curr_idx_ != next_idx_) {
359  sums_[curr_idx_] = sums_[next_idx_];
360  counts_[curr_idx_] = counts_[next_idx_];
361  }
362  next_idx_ += inc_;
363 }
364 
365 template <typename RealType, typename IndexType>
367  if (forward) {
368  inc_ = 1;
369  curr_idx_ = ~IndexType(0);
370  } else {
371  inc_ = -1;
372  curr_idx_ = size();
373  }
374  static_assert(std::is_unsigned<IndexType>::value,
375  "IndexType must be an unsigned type.");
376  next_idx_ = curr_idx_ + inc_;
377 }
378 
379 #ifndef __CUDACC__
380 template <typename RealType, typename IndexType>
381 std::ostream& operator<<(std::ostream& out,
382  Centroids<RealType, IndexType> const& centroids) {
383  out << "Centroids<" << typeid(RealType).name() << ',' << typeid(IndexType).name()
384  << ">(size(" << centroids.size() << ") curr_idx_(" << centroids.curr_idx_
385  << ") next_idx_(" << centroids.next_idx_ << ") sums_(";
386  for (IndexType i = 0; i < centroids.sums_.size(); ++i) {
387  out << (i ? " " : "") << std::setprecision(20) << centroids.sums_[i];
388  }
389  out << ") counts_(";
390  for (IndexType i = 0; i < centroids.counts_.size(); ++i) {
391  out << (i ? " " : "") << centroids.counts_[i];
392  }
393  return out << "))";
394 }
395 #endif
396 
397 // Class CentroidsMerger<>
398 
399 template <typename RealType, typename IndexType>
403  bool const forward)
404  : buf_(buf)
405  , centroids_(centroids)
406  , total_weight_(centroids->totalWeight() + buf->totalWeight())
407  , forward_(forward) {
408  buf_->resetIndices(forward_);
409  centroids_->resetIndices(forward_);
410  mergeMinMax();
411  setCurrCentroid();
412 }
413 
414 template <typename RealType, typename IndexType>
417  if (buf_->hasNext()) {
418  if (centroids_->hasNext()) {
419  return (*buf_ < *centroids_) == forward_ ? buf_ : centroids_;
420  }
421  return buf_;
422  } else if (centroids_->hasNext()) {
423  return centroids_;
424  }
425  return nullptr;
426 }
427 
428 namespace {
429 
430 // Helper struct for mergeCentroids() for tracking skipped centroids.
431 // A consecutive sequence of incoming centroids with the same mean
432 // and exceed max_count are skipped before expensive reshuffling of memory.
433 template <typename RealType, typename IndexType>
434 class Skipped {
435  struct Data {
436  Centroids<RealType, IndexType>* centroid_{nullptr};
437  IndexType start_{0};
438  IndexType count_merged_{0};
439  IndexType count_skipped_{0};
440  } data_[2];
442 
443  DEVICE static void shiftCentroids(Data& data) {
444  if (data.count_merged_) {
445  shiftRange(data.centroid_->sums_.begin() + data.start_,
446  data.count_skipped_,
447  data.count_merged_,
448  data.centroid_->inc_);
449  shiftRange(data.centroid_->counts_.begin() + data.start_,
450  data.count_skipped_,
451  data.count_merged_,
452  data.centroid_->inc_);
453  data.start_ += data.centroid_->inc_ * data.count_merged_;
454  }
455  }
456  template <typename T>
457  DEVICE static void shiftRange(T* const begin,
458  IndexType skipped,
459  IndexType const merged,
460  int const inc) {
461 #ifdef __CUDACC__
462  T* src = begin + inc * (skipped - 1);
463  T* dst = src + inc * merged;
464  for (; skipped; --skipped, src -= inc, dst -= inc) {
465  *dst = *src;
466  }
467 #else
468  if (inc == 1) {
469  std::copy_backward(begin, begin + skipped, begin + skipped + merged);
470  } else {
471  std::copy(begin + 1 - skipped, begin + 1, begin + 1 - skipped - merged);
472  }
473 #endif
474  }
475 
476  public:
478  return data_[0].centroid_ != centroid;
479  }
481  return mean_.sum_ * next_centroid->nextCount() !=
482  next_centroid->nextSum() * mean_.count_;
483  }
485  IndexType const idx = index(next_centroid);
486  if (data_[idx].count_skipped_) {
487  ++data_[idx].count_merged_;
488  }
489  }
490  DEVICE operator bool() const { return data_[0].centroid_; }
491  // Shift skipped centroids over merged centroids, and rewind next_idx_.
493  shiftCentroids(data_[0]);
494  data_[0].centroid_->next_idx_ = data_[0].start_;
495  if (data_[1].centroid_) {
496  shiftCentroids(data_[1]);
497  data_[1].centroid_->next_idx_ = data_[1].start_;
498  }
499  }
501  mean_ = Centroid<RealType, IndexType>{next_centroid->nextSum(),
502  next_centroid->nextCount()};
503  data_[0] = {next_centroid, next_centroid->next_idx_, 0, 1};
504  next_centroid->next_idx_ += next_centroid->inc_;
505  }
507  IndexType const idx = index(next_centroid);
508  if (idx == 1 && data_[1].centroid_ == nullptr) {
509  data_[1] = {next_centroid, next_centroid->next_idx_, 0, 1};
510  } else {
511  if (data_[idx].count_merged_) {
512  shiftCentroids(data_[idx]);
513  data_[idx].count_merged_ = 0;
514  }
515  ++data_[idx].count_skipped_;
516  }
517  next_centroid->next_idx_ += next_centroid->inc_;
518  }
519 };
520 
521 } // namespace
522 
523 // Merge as many centroids as possible for count <= max_count.
524 template <typename RealType, typename IndexType>
525 DEVICE void CentroidsMerger<RealType, IndexType>::merge(IndexType const max_count) {
526  Skipped<RealType, IndexType> skipped;
527  while (auto* next_centroid = getNextCentroid()) {
528  if (skipped) {
529  if (skipped.isDifferentMean(next_centroid)) {
530  break;
531  } else if (curr_centroid_->mergeIfFits(*next_centroid, max_count)) {
532  skipped.merged(next_centroid);
533  } else {
534  skipped.skipSubsequent(next_centroid);
535  }
536  } else if (!curr_centroid_->mergeIfFits(*next_centroid, max_count)) {
537  skipped.skipFirst(next_centroid);
538  }
539  }
540  if (skipped) {
541  skipped.shiftCentroidsAndSetNext();
542  }
543 }
544 
545 // Track min/max without assuming the data points exist in any particular centroids.
546 // Otherwise to assume and track their existence in the min/max centroids introduces
547 // significant complexity. For example, the min centroid may not remain the min
548 // centroid if it merges into it the 3rd smallest centroid, skipping the 2nd due to
549 // the scaling function constraint.
550 // When the quantile() is calculated, the min/max data points are assumed to exist
551 // in the min/max centroids for the purposes of calculating the quantile within those
552 // centroids.
553 template <typename RealType, typename IndexType>
555  if (centroids_->max_ < buf_->max_) {
556  centroids_->max_ = buf_->max_;
557  }
558  if (buf_->min_ < centroids_->min_) {
559  centroids_->min_ = buf_->min_;
560  }
561 }
562 
563 // Assume curr_centroid_ is fully merged.
564 template <typename RealType, typename IndexType>
566  prefix_sum_ += curr_centroid_->currCount();
567  setCurrCentroid();
568 }
569 
570 template <typename RealType, typename IndexType>
572  if ((curr_centroid_ = getNextCentroid())) {
573  curr_centroid_->moveNextToCurrent();
574  }
575 }
576 
577 // class TDigest<>
578 
579 template <typename RealType, typename IndexType>
581  if (buf_.sums_.full()) {
582  mergeBuffer();
583  }
584  buf_.sums_.push_back(value);
585  buf_.counts_.push_back(1);
586 }
587 
588 // Assumes buf_ is allocated iff centroids_ is allocated.
589 template <typename RealType, typename IndexType>
591  if (buf_.capacity() == 0) {
592  auto* p0 = simple_allocator_->allocate(buf_allocate_ * sizeof(RealType));
593  auto* p1 = simple_allocator_->allocate(buf_allocate_ * sizeof(IndexType));
595  VectorView<RealType>(reinterpret_cast<RealType*>(p0), 0, buf_allocate_),
596  VectorView<IndexType>(reinterpret_cast<IndexType*>(p1), 0, buf_allocate_));
597  p0 = simple_allocator_->allocate(centroids_allocate_ * sizeof(RealType));
598  p1 = simple_allocator_->allocate(centroids_allocate_ * sizeof(IndexType));
599  centroids_ = Centroids<RealType, IndexType>(
600  VectorView<RealType>(reinterpret_cast<RealType*>(p0), 0, centroids_allocate_),
601  VectorView<IndexType>(reinterpret_cast<IndexType*>(p1), 0, centroids_allocate_));
602  }
603 }
604 
605 // Return total_weight * (f_inv(f(q_{i-1})+1) - q_{i-1})
606 template <typename RealType, typename IndexType>
607 DEVICE IndexType
609  IndexType const total_weight,
610  RealType const c) {
611  IndexType const max_bins = centroids_.capacity(); // "compression parameter" delta
612  if (total_weight <= max_bins) {
613  return 0;
614  } else if (use_linear_scaling_function_) {
615  // Whitepaper scaling function k=0.
616  return 2 * total_weight / max_bins;
617  } else {
618  // Whitepaper scaling function k=1.
619  RealType const x = 2.0 * sum / total_weight - 1; // = 2 q_{i-1} - 1
620  RealType const f_inv = 0.5 + 0.5 * std::sin(c + std::asin(x));
621  constexpr RealType eps = 1e-5; // round-off epsilon for floor().
622  IndexType const dsum = static_cast<IndexType>(total_weight * f_inv + eps);
623  return dsum < sum ? 0 : dsum - sum;
624  }
625 }
626 
627 // Assumes buf_ consists only of singletons.
628 template <typename RealType, typename IndexType>
630  if (buf_.size()) {
631  gpu_enabled::sort(buf_.sums_.begin(), buf_.sums_.end());
632  buf_.min_ = buf_.sums_.front();
633  buf_.max_ = buf_.sums_.back();
634  mergeCentroids(buf_);
635  }
636 }
637 
638 template <typename RealType, typename IndexType>
640  IndexType* counts,
641  IndexType size) {
642  if (size) {
643  if (buf_.capacity() == 0) {
644  buf_ = Centroids<RealType, IndexType>(sums, counts, size); // Set capacity and size
645  } else {
646  buf_.sums_.set(sums, size); // Does not change capacity
647  buf_.counts_.set(counts, size);
648  }
649  gpu_enabled::fill(buf_.counts_.begin(), buf_.counts_.end(), IndexType(1));
650  buf_.min_ = buf_.sums_.front();
651  buf_.max_ = buf_.sums_.back();
652  mergeCentroids(buf_);
653  }
654 }
655 
656 // Require:
657 // * buf centroids are not empty and sorted by (mean, -count)
658 // * buf.min_ and buf.max_ are correctly set.
659 // During filling stage, buf=buf_.
660 // During reduction, buf_=centroids_[i]
661 template <typename RealType, typename IndexType>
664  constexpr RealType two_pi = 6.283185307179586476925286766559005768e+00;
665  // Hoisted constant used in maxCardinality().
666  RealType const c = two_pi / centroids_.capacity();
667  // Loop over sorted sequence of buf and centroids_.
668  // Some latter centroids may be merged into the current centroid, so the number
669  // of iterations is only at most equal to the number of initial centroids.
671  for (CM cm(&buf, &centroids_, forward_); cm.hasNext(); cm.next()) {
672  // cm.prefixSum() == 0 on first iteration.
673  // Max cardinality for current centroid to be fully merged based on scaling function.
674  IndexType const max_cardinality = maxCardinality(cm.prefixSum(), cm.totalWeight(), c);
675  cm.merge(max_cardinality);
676  }
677  // Combine sorted centroids buf[0..curr_idx_] + centroids_[0..curr_idx_] if forward
678  centroids_.appendAndSortCurrent(buf);
679  buf.clear();
680  forward_ ^= true; // alternate merge direction on each call
681 }
682 
683 namespace {
684 template <typename CountsIterator>
685 DEVICE bool isSingleton(CountsIterator itr) {
686  return *itr == 1;
687 }
688 } // namespace
689 
690 // Assumes x < centroids_.counts_.front().
691 template <typename RealType, typename IndexType>
693  if (x < 1) {
694  return min();
695  } else if (centroids_.size() == 1) {
696  return oneCentroid(x);
697  } else if (centroids_.counts_.front() == 2) {
698  RealType const sum = centroids_.sums_.front();
699  return x == 1 ? 0.5 * sum : sum - min();
700  } else {
701  RealType const count = centroids_.counts_.front();
702  RealType const dx = x - RealType(0.5) * (1 + count);
703  RealType const mean = (centroids_.sums_.front() - min()) / (count - 1);
704  return mean + slope(0, 0 < dx) * dx;
705  }
706 }
707 
708 // x is between first and last centroids.
709 template <typename RealType, typename IndexType>
710 DEVICE RealType
712  IndexType const idx1,
713  IndexType const prefix_sum) {
714  if (isSingleton(centroids_.counts_.begin() + idx1)) {
715  RealType const sum1 = centroids_.sums_[idx1];
716  if (x == prefix_sum - centroids_.counts_[idx1]) {
717  if (isSingleton(centroids_.counts_.begin() + idx1 - 1)) {
718  return 0.5 * (centroids_.sums_[idx1 - 1] + sum1);
719  } else if (idx1 == 1 && centroids_.counts_[0] == 2) {
720  return 0.5 * (centroids_.sums_[idx1 - 1] - min() + sum1);
721  }
722  }
723  return sum1;
724  } else {
725  RealType const dx = x + RealType(0.5) * centroids_.counts_[idx1] - prefix_sum;
726  IndexType const idx2 = idx1 + 2 * (0 < dx) - 1;
727  return centroids_.mean(idx1) + slope(idx1, idx2) * dx;
728  }
729 }
730 
731 // Assumes N - centroids_.counts_.back() <= x < N, and there is more than 1 centroid.
732 template <typename RealType, typename IndexType>
734  IndexType const N) {
735  if (N - 1 < x) {
736  return max();
737  }
738  IndexType const idx1 = centroids_.size() - 1;
739  RealType const sum1 = centroids_.sums_[idx1];
740  IndexType const count1 = centroids_.counts_[idx1];
741  if (count1 == 1) { // => x == N - 1
742  if (isSingleton(centroids_.counts_.begin() + (idx1 - 1))) {
743  return 0.5 * (centroids_.sums_[idx1 - 1] + sum1);
744  } else if (idx1 == 1 && centroids_.counts_[0] == 2) {
745  return 0.5 * (centroids_.sums_[idx1 - 1] - min() + sum1);
746  } else {
747  return sum1;
748  }
749  } else if (count1 == 2) { // => 3 <= N
750  if (x == N - 1) {
751  return 0.5 * sum1;
752  } else if (x == N - 2) {
753  RealType const sum2 = centroids_.sums_[idx1 - 1];
754  if (isSingleton(centroids_.counts_.begin() + (idx1 - 1))) {
755  return 0.5 * (sum2 + sum1 - max());
756  } else if (idx1 == 1 && centroids_.counts_[0] == 2) {
757  return 0.5 * (sum2 - min() + sum1 - max());
758  }
759  }
760  return sum1 - max();
761  } else { // => 3 <= count1
762  RealType const dx = x + RealType(0.5) * (count1 + 1) - N;
763  RealType const mean = (sum1 - max()) / (count1 - 1);
764  return mean + slope(idx1, idx1 - (dx < 0)) * dx;
765  }
766 }
767 
768 // Assumes there is only 1 centroid, and 1 <= x < centroids_.counts_.front().
769 template <typename RealType, typename IndexType>
771  IndexType const N = centroids_.counts_.front();
772  if (N - 1 < x) { // includes case N == 1
773  return max();
774  } else if (N == 2) { // x == 1
775  return 0.5 * centroids_.sums_.front();
776  } else if (N == 3) { // 1 <= x <= 2
777  if (x == 2) {
778  return 0.5 * (centroids_.sums_.front() - min());
779  } else {
780  RealType const s = centroids_.sums_.front() - max();
781  return x == 1 ? 0.5 * s : s - min();
782  }
783  } else { // 3 < N
784  RealType const dx = x - RealType(0.5) * N;
785  RealType const mean = (centroids_.sums_.front() - (min() + max())) / (N - 2);
786  RealType const slope = 2 * (0 < dx ? max() - mean : mean - min()) / (N - 2);
787  return mean + slope * dx;
788  }
789 }
790 
791 // No need to calculate entire partial_sum unless multiple calls to quantile() are made.
792 template <typename RealType, typename IndexType>
793 DEVICE RealType TDigest<RealType, IndexType>::quantile(IndexType* buf, RealType const q) {
794  if (centroids_.size()) {
795  VectorView<IndexType> partial_sum(buf, centroids_.size(), centroids_.size());
797  centroids_.counts_.begin(), centroids_.counts_.end(), partial_sum.begin());
798  IndexType const N = partial_sum.back();
799  RealType const x = q * N;
800  auto const it1 = gpu_enabled::upper_bound(partial_sum.begin(), partial_sum.end(), x);
801  if (it1 == partial_sum.begin()) {
802  return firstCentroid(x);
803  } else if (it1 == partial_sum.end()) { // <==> 1 <= q
804  return max();
805  } else if (it1 + 1 == partial_sum.end()) {
806  return lastCentroid(x, N);
807  } else {
808  return interiorCentroid(x, it1 - partial_sum.begin(), *it1);
809  }
810  } else {
811  return centroids_.nan;
812  }
813 }
814 
815 // Requirement: 1 < M and 0 <= idx1, idx2 < M where M = Number of centroids.
816 // Return slope of line segment connecting idx1 and idx2.
817 // If equal then assume it is the section contained within an extrema.
818 // Centroid for idx1 is not a singleton, but idx2 may be.
819 template <typename RealType, typename IndexType>
820 DEVICE RealType TDigest<RealType, IndexType>::slope(IndexType idx1, IndexType idx2) {
821  IndexType const M = centroids_.size();
822  if (idx1 == idx2) { // Line segment is contained in either the first or last centroid.
823  RealType const n = static_cast<RealType>(centroids_.counts_[idx1]);
824  RealType const s = centroids_.sums_[idx1];
825  return idx1 == 0 ? 2 * (s - n * min()) / ((n - 1) * (n - 1))
826  : 2 * (n * max() - s) / ((n - 1) * (n - 1));
827  } else {
828  bool const min1 = idx1 == 0; // idx1 is the min centroid
829  bool const max1 = idx1 == M - 1; // idx1 is the max centroid
830  bool const min2 = idx2 == 0; // idx2 is the min centroid
831  bool const max2 = idx2 == M - 1; // idx2 is the max centroid
832  RealType const n1 = static_cast<RealType>(centroids_.counts_[idx1] - min1 - max1);
833  RealType const s1 = centroids_.sums_[idx1] - (min1 ? min() : max1 ? max() : 0);
834  RealType const s2 = centroids_.sums_[idx2] - (min2 ? min() : max2 ? max() : 0);
835  if (isSingleton(centroids_.counts_.begin() + idx2)) {
836  return (idx1 < idx2 ? 2 : -2) * (n1 * s2 - s1) / (n1 * n1);
837  } else {
838  RealType const n2 = static_cast<RealType>(centroids_.counts_[idx2] - min2 - max2);
839  return (idx1 < idx2 ? 2 : -2) * (n1 * s2 - n2 * s1) / (n1 * n2 * (n1 + n2));
840  }
841  }
842 }
843 
844 } // namespace detail
845 
847 
848 } // namespace quantile
DEVICE auto upper_bound(ARGS &&...args)
Definition: gpu_enabled.h:123
static constexpr RealType infinity
Definition: quantile.h:62
DEVICE void push_back(RealType const value, RealType const count)
Definition: quantile.h:114
DEVICE void push_back(T const &value)
Definition: VectorView.h:74
std::vector< RealType > sums_
Definition: quantile.h:170
DEVICE size_type capacity() const
Definition: VectorView.h:61
Centroids< RealType, IndexType > * buf_
Definition: quantile.h:135
DEVICE void skipSubsequent(Centroids< RealType, IndexType > *next_centroid)
Definition: quantile.h:506
std::ostream & operator<<(std::ostream &out, Centroids< RealType, IndexType > const &centroids)
Definition: quantile.h:381
DEVICE RealType quantile(RealType const q)
Definition: quantile.h:258
bool const use_linear_scaling_function_
Definition: quantile.h:191
CentroidsMemory(size_t const size)
Definition: quantile.h:174
DEVICE RealType mean(IndexType const i) const
Definition: quantile.h:94
DEVICE bool index(Centroids< RealType, IndexType > *centroid) const
Definition: quantile.h:477
DEVICE void resetIndices(bool const forward)
Definition: quantile.h:366
DEVICE Centroids< RealType, IndexType > & centroids()
Definition: quantile.h:235
DEVICE void moveNextToCurrent()
Definition: quantile.h:356
DEVICE void add(RealType value)
Definition: quantile.h:580
string name
Definition: setup.in.py:72
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
DEVICE void merged(Centroids< RealType, IndexType > *next_centroid)
Definition: quantile.h:484
DEVICE bool mergeIfFits(Centroids &centroid, IndexType const max_count)
Definition: quantile.h:344
DEVICE bool hasNext() const
Definition: quantile.h:92
DEVICE TDigest(Memory &mem)
Definition: quantile.h:220
DEVICE void mergeBuffer()
Definition: quantile.h:629
DEVICE IndexType totalWeight() const
Definition: quantile.h:286
VectorView< RealType > sums()
Definition: quantile.h:177
DEVICE T * data()
Definition: VectorView.h:65
VectorView< RealType > sums_
Definition: quantile.h:60
VectorView< IndexType > counts_
Definition: quantile.h:61
std::vector< IndexType > counts_
Definition: quantile.h:171
DEVICE void clear()
Definition: VectorView.h:64
DEVICE RealType lastCentroid(RealType const x, IndexType const N)
Definition: quantile.h:733
#define DEVICE
constexpr double a
Definition: Utm.h:33
DEVICE RealType currMean() const
Definition: quantile.h:88
DEVICE RealType quantile()
Definition: quantile.h:263
DEVICE void setCentroids(Memory &mem)
Definition: quantile.h:273
DEVICE RealType oneCentroid(RealType const x)
Definition: quantile.h:770
DEVICE size_type size() const
Definition: VectorView.h:84
DEVICE TDigest(RealType q, SimpleAllocator *simple_allocator, IndexType buf_allocate, IndexType centroids_allocate)
Definition: quantile.h:225
Centroids< RealType, IndexType > * centroids_
Definition: quantile.h:136
DEVICE RealType firstCentroid(RealType const x)
Definition: quantile.h:692
DEVICE RealType max() const
Definition: quantile.h:196
DEVICE void fill(ARGS &&...args)
Definition: gpu_enabled.h:60
DEVICE void set(T *data, size_type const size)
Definition: VectorView.h:80
DEVICE auto copy(ARGS &&...args)
Definition: gpu_enabled.h:51
int count
DEVICE Centroids(VectorView< RealType > sums, VectorView< IndexType > counts)
Definition: quantile.h:72
DEVICE T * begin() const
Definition: VectorView.h:60
DEVICE CentroidsMerger(Centroids< RealType, IndexType > *buf, Centroids< RealType, IndexType > *centroids, bool const forward)
Definition: quantile.h:400
DEVICE void mergeTDigest(TDigest &t_digest)
Definition: quantile.h:248
DEVICE IndexType nextCount() const
Definition: quantile.h:101
DEVICE void partial_sum(ARGS &&...args)
Definition: gpu_enabled.h:87
DEVICE void setCentroids(VectorView< RealType > const sums, VectorView< IndexType > const counts)
Definition: quantile.h:279
DEVICE RealType interiorCentroid(RealType const x, IndexType const idx1, IndexType const prefix_sum)
Definition: quantile.h:711
DEVICE auto accumulate(ARGS &&...args)
Definition: gpu_enabled.h:42
Centroids< RealType, IndexType > buf_
Definition: quantile.h:185
IndexType const buf_allocate_
Definition: quantile.h:193
DEVICE RealType nextSum() const
Definition: quantile.h:103
std::optional< RealType > const q_
Definition: quantile.h:190
DEVICE void skipFirst(Centroids< RealType, IndexType > *next_centroid)
Definition: quantile.h:500
DEVICE void allocate()
Definition: quantile.h:590
static DEVICE void shiftRange(T *const begin, IndexType skipped, IndexType const merged, int const inc)
Definition: quantile.h:457
DEVICE void setBuffer(Memory &mem)
Definition: quantile.h:266
DEVICE bool operator()(Value const &a, Value const &b) const
Definition: quantile.h:300
static constexpr RealType nan
Definition: quantile.h:63
DEVICE Centroids< RealType, IndexType > * getNextCentroid() const
Definition: quantile.h:416
DEVICE IndexType prefixSum() const
Definition: quantile.h:161
DEVICE Centroids(RealType *sums, IndexType *counts, IndexType const size)
Definition: quantile.h:69
SimpleAllocator *const simple_allocator_
Definition: quantile.h:192
DEVICE void merge(IndexType const max_count)
Definition: quantile.h:525
DEVICE bool operator<(Centroids const &b) const
Definition: quantile.h:107
DEVICE bool hasNext() const
Definition: quantile.h:153
IndexType const centroids_allocate_
Definition: quantile.h:194
Centroids< RealType, IndexType > * curr_centroid_
Definition: quantile.h:137
DEVICE IndexType maxCardinality(IndexType const sum, IndexType const total_weight, RealType const c)
Definition: quantile.h:608
DEVICE size_t size() const
Definition: quantile.h:121
DEVICE IndexType currCount() const
Definition: quantile.h:86
DEVICE IndexType totalWeight() const
Definition: quantile.h:123
DEVICE bool isDifferentMean(Centroids< RealType, IndexType > *next_centroid) const
Definition: quantile.h:480
DEVICE T0 value0() const
Definition: DoubleSort.h:87
DEVICE RealType slope(IndexType const idx1, IndexType const idx2)
Definition: quantile.h:820
constexpr unsigned N
Definition: Utm.h:111
DEVICE IndexType capacity() const
Definition: quantile.h:77
DEVICE IndexType totalWeight() const
Definition: quantile.h:163
DEVICE T1 value1() const
Definition: DoubleSort.h:88
DEVICE void mergeCentroids(Centroids< RealType, IndexType > &)
Definition: quantile.h:662
constexpr double n
Definition: Utm.h:39
DEVICE void reverse(ARGS &&...args)
Definition: gpu_enabled.h:96
DEVICE RealType min() const
Definition: quantile.h:197
DEVICE bool isSingleton(CountsIterator itr)
Definition: quantile.h:685
DEVICE void appendAndSortCurrent(Centroids &buff)
Definition: quantile.h:310
DEVICE bool hasCurr() const
Definition: quantile.h:90
DEVICE void mergeSorted(RealType *sums, IndexType *counts, IndexType size)
Definition: quantile.h:639
VectorView< IndexType > counts()
Definition: quantile.h:178
Centroids< RealType, IndexType > centroids_
Definition: quantile.h:186
DEVICE T * end() const
Definition: VectorView.h:68