QuantLib: a free/open-source library for quantitative finance
Fully annotated sources - version 1.32
Loading...
Searching...
No Matches
histogram.cpp
1/* -*- mode: c++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2
3/*
4 Copyright (C) 2007 Gang Liang
5
6 This file is part of QuantLib, a free-software/open-source library
7 for financial quantitative analysts and developers - http://quantlib.org/
8
9 QuantLib is free software: you can redistribute it and/or modify it
10 under the terms of the QuantLib license. You should have received a
11 copy of the license along with this program; if not, please email
12 <quantlib-dev@lists.sf.net>. The license is also available online at
13 <http://quantlib.org/license.shtml>.
14
15 This program is distributed in the hope that it will be useful, but WITHOUT
16 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the license for more details.
18*/
19
20#include <ql/math/statistics/histogram.hpp>
21#include <ql/math/statistics/incrementalstatistics.hpp>
22#include <ql/math/comparison.hpp>
23#include <algorithm>
24
25namespace QuantLib {
26
27 namespace {
28
29 /* The discontinuous quantiles use the method (type 8) as
30 recommended by Hyndman and Fan (1996). The resulting
31 quantile estimates are approximately median-unbiased
32 regardless of the distribution of 'samples'.
33
34 If quantile function is called multiple times for the same
35 dataset, it is recommended to pre-sort the sample vector.
36 */
37 Real quantile(const std::vector<Real>& samples, Real prob) {
38 Size nsample = samples.size();
39 QL_REQUIRE(prob >= 0.0 && prob <= 1.0,
40 "Probability has to be in [0,1].");
41 QL_REQUIRE(nsample > 0, "The sample size has to be positive." );
42
43 if (nsample == 1)
44 return samples[0];
45
46 // two special cases: close to boundaries
47 const Real a = 1. / 3, b = 2*a / (nsample+a);
48 if (prob < b)
49 return *std::min_element(samples.begin(), samples.end());
50 else if (prob > 1-b)
51 return *std::max_element(samples.begin(), samples.end());
52
53 // general situation: middle region and nsample >= 2
54 Size index = static_cast<Size>(std::floor((nsample+a)*prob+a));
55 std::vector<Real> sorted(index+1);
56 std::partial_sort_copy(samples.begin(), samples.end(),
57 sorted.begin(), sorted.end());
58
59 // use "index & index+1"th elements to interpolate the quantile
60 Real weight = nsample*prob + a - index;
61 return (1-weight) * sorted[index-1] + weight * sorted[index];
62 }
63
64 }
65
66
68 return bins_;
69 }
70
71 const std::vector<Real>& Histogram::breaks() const {
72 return breaks_;
73 }
74
76 return algorithm_;
77 }
78
79 bool Histogram::empty() const {
80 return bins_ == 0;
81 }
82
84 #if defined(QL_EXTRA_SAFETY_CHECKS)
85 return counts_.at(i);
86 #else
87 return counts_[i];
88 #endif
89 }
90
92 #if defined(QL_EXTRA_SAFETY_CHECKS)
93 return frequency_.at(i);
94 #else
95 return frequency_[i];
96 #endif
97 }
98
100 QL_REQUIRE(!data_.empty(), "no data given");
101
102 Real min = *std::min_element(data_.begin(), data_.end());
103 Real max = *std::max_element(data_.begin(), data_.end());
104
105 // calculate number of bins if necessary
106 if (bins_ == Null<Size>()) {
107 switch (algorithm_) {
108 case Sturges: {
109 bins_ = static_cast<Size>(
110 std::ceil(std::log(static_cast<Real>(data_.size()))
111 /std::log(2.0) + 1));
112 break;
113 }
114 case FD: {
115 Real r1 = quantile(data_, 0.25);
116 Real r2 = quantile(data_, 0.75);
117 Real h = 2.0 * (r2-r1) * std::pow(static_cast<Real>(data_.size()), -1.0/3.0);
118 bins_ = static_cast<Size>(std::ceil((max-min)/h));
119 break;
120 }
121 case Scott: {
122 IncrementalStatistics summary;
123 summary.addSequence(data_.begin(), data_.end());
124 Real variance = summary.variance();
125 Real h = 3.5 * std::sqrt(variance)
126 * std::pow(static_cast<Real>(data_.size()), -1.0/3.0);
127 bins_ = static_cast<Size>(std::ceil((max-min)/h));
128 break;
129 }
130 case None:
131 QL_FAIL("a bin-partition algorithm is required");
132 default:
133 QL_FAIL("unknown bin-partition algorithm");
134 };
135 bins_ = std::max<Size>(bins_,1);
136 }
137
138 if (breaks_.empty()) {
139 // set breaks if not provided
140 breaks_.resize(bins_-1);
141
142 // ensure breaks_ evenly span over the range of data_
143 // TODO: borrow the idea of pretty in R.
144 Real h = (max-min)/bins_;
145 for (Size i=0; i<breaks_.size(); ++i) {
146 breaks_[i] = min + (i+1)*h;
147 }
148 } else {
149 // or ensure they're sorted if given
150 std::sort(breaks_.begin(), breaks_.end());
151 auto end = std::unique(breaks_.begin(), breaks_.end(),
152 static_cast<bool (*)(Real, Real)>(close_enough));
153 breaks_.resize(end - breaks_.begin());
154 }
155
156 // finally, calculate counts and frequencies
157 counts_.resize(bins_);
158 std::fill(counts_.begin(), counts_.end(), 0);
159
160 for (Real p : data_) {
161 bool processed = false;
162 for (Size i=0; i<breaks_.size(); ++i) {
163 if (p < breaks_[i]) {
164 ++counts_[i];
165 processed = true;
166 break;
167 }
168 }
169 if (!processed)
170 ++counts_[bins_-1];
171 }
172
173 frequency_.resize(bins_);
174
175 Size totalCounts = data_.size();
176 for (Size i=0; i<bins_; ++i)
177 frequency_[i] = static_cast<Real>(counts_[i])/totalCounts;
178 }
179
180}
Size bins() const
Definition: histogram.cpp:67
std::vector< Real > breaks_
Definition: histogram.hpp:84
Algorithm algorithm_
Definition: histogram.hpp:83
std::vector< Real > frequency_
Definition: histogram.hpp:86
std::vector< Size > counts_
Definition: histogram.hpp:85
Algorithm algorithm() const
Definition: histogram.cpp:75
bool empty() const
Definition: histogram.cpp:79
Real frequency(Size i) const
Definition: histogram.cpp:91
Size counts(Size i) const
Definition: histogram.cpp:83
const std::vector< Real > & breaks() const
Definition: histogram.cpp:71
std::vector< Real > data_
Definition: histogram.hpp:81
Statistics tool based on incremental accumulation.
void addSequence(DataIterator begin, DataIterator end)
adds a sequence of data to the set, with default weight
template class providing a null value for a given type.
Definition: null.hpp:76
QL_REAL Real
real number
Definition: types.hpp:50
std::size_t Size
size of a container
Definition: types.hpp:58
Definition: any.hpp:35
bool close_enough(const Quantity &m1, const Quantity &m2, Size n)
Definition: quantity.cpp:182