[go: up one dir, main page]

average/moments/
skewness.rs

1use num_traits::Float;
2
3/// Estimate the arithmetic mean, the variance and the skewness of a sequence of
4/// numbers ("population").
5///
6/// This can be used to estimate the standard error of the mean.
7#[derive(Debug, Clone)]
8#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
9pub struct Skewness {
10    /// Estimator of mean and variance.
11    avg: MeanWithError,
12    /// Intermediate sum of cubes for calculating the skewness.
13    sum_3: f64,
14}
15
16impl Skewness {
17    /// Create a new skewness estimator.
18    #[inline]
19    pub fn new() -> Skewness {
20        Skewness {
21            avg: MeanWithError::new(),
22            sum_3: 0.,
23        }
24    }
25
26    /// Increment the sample size.
27    ///
28    /// This does not update anything else.
29    #[inline]
30    fn increment(&mut self) {
31        self.avg.increment();
32    }
33
34    /// Add an observation given an already calculated difference from the mean
35    /// divided by the number of samples, assuming the inner count of the sample
36    /// size was already updated.
37    ///
38    /// This is useful for avoiding unnecessary divisions in the inner loop.
39    #[inline]
40    fn add_inner(&mut self, delta: f64, delta_n: f64) {
41        // This algorithm was suggested by Terriberry.
42        //
43        // See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance.
44        let n = self.len().to_f64().unwrap();
45        let term = delta * delta_n * (n - 1.);
46        self.sum_3 += term * delta_n * (n - 2.)
47            - 3.*delta_n * self.avg.sum_2;
48        self.avg.add_inner(delta_n);
49    }
50
51    /// Determine whether the sample is empty.
52    #[inline]
53    pub fn is_empty(&self) -> bool {
54        self.avg.is_empty()
55    }
56
57    /// Estimate the mean of the population.
58    ///
59    /// Returns NaN for an empty sample.
60    #[inline]
61    pub fn mean(&self) -> f64 {
62        self.avg.mean()
63    }
64
65    /// Return the sample size.
66    #[inline]
67    pub fn len(&self) -> u64 {
68        self.avg.len()
69    }
70
71    /// Calculate the sample variance.
72    ///
73    /// This is an unbiased estimator of the variance of the population.
74    /// 
75    /// Returns NaN for samples of size 1 or less.
76    #[inline]
77    pub fn sample_variance(&self) -> f64 {
78        self.avg.sample_variance()
79    }
80
81    /// Calculate the population variance of the sample.
82    ///
83    /// This is a biased estimator of the variance of the population.
84    /// 
85    /// Returns NaN for an empty sample.
86    #[inline]
87    pub fn population_variance(&self) -> f64 {
88        self.avg.population_variance()
89    }
90
91    /// Estimate the standard error of the mean of the population.
92    #[inline]
93    pub fn error_mean(&self) -> f64 {
94        self.avg.error()
95    }
96
97    /// Estimate the skewness of the population.
98    /// 
99    /// Returns NaN for an empty sample.
100    #[inline]
101    pub fn skewness(&self) -> f64 {
102        if self.is_empty() {
103            return f64::NAN;
104        }
105        if self.sum_3 == 0. {
106            return 0.;
107        }
108        let n = self.len().to_f64().unwrap();
109        let sum_2 = self.avg.sum_2;
110        debug_assert_ne!(sum_2, 0.);
111        Float::sqrt(n) * self.sum_3 / Float::sqrt(sum_2*sum_2*sum_2)
112    }
113}
114
115impl Default for Skewness {
116    fn default() -> Skewness {
117        Skewness::new()
118    }
119}
120
121impl Estimate for Skewness {
122    #[inline]
123    fn add(&mut self, x: f64) {
124        let delta = x - self.avg.avg.avg;
125        self.increment();
126        let n = self.len().to_f64().unwrap();
127        self.add_inner(delta, delta/n);
128    }
129
130    #[inline]
131    fn estimate(&self) -> f64 {
132        self.skewness()
133    }
134}
135
136impl Merge for Skewness {
137    #[inline]
138    fn merge(&mut self, other: &Skewness) {
139        if other.is_empty() {
140            return;
141        }
142        if self.is_empty() {
143            *self = other.clone();
144            return;
145        }
146        let len_self = self.len().to_f64().unwrap();
147        let len_other = other.len().to_f64().unwrap();
148        let len_total = len_self + len_other;
149        let delta = other.mean() - self.mean();
150        let delta_n = delta / len_total;
151        self.sum_3 += other.sum_3
152            + delta*delta_n*delta_n * len_self*len_other*(len_self - len_other)
153            + 3.*delta_n * (len_self * other.avg.sum_2 - len_other * self.avg.sum_2);
154        self.avg.merge(&other.avg);
155    }
156}
157
158impl_from_iterator!(Skewness);
159impl_from_par_iterator!(Skewness);
160impl_extend!(Skewness);