[go: up one dir, main page]

stat/
lib.rs

1// lib.rs
2//
3// Copyright (C) 1996, 1997, 1998, 1999, 2000, 2007 Jim Davies, Brian Gough
4// Copyright (C) 2016 G.vd.Schoot
5//
6// This program is free software; you can redistribute it and/or modify
7// it under the terms of the GNU General Public License as published by
8// the Free Software Foundation; either version 2 of the License, or (at
9// your option) any later version.
10//
11// This program is distributed in the hope that it will be useful, but
12// WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14// General Public License for more details.
15//
16// You should have received a copy of the GNU General Public License
17// along with this program; if not, write to the Free Software
18// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19//
20
21pub mod types;
22use types::F64;
23
24// mean
25
26// mean calculates the arithmetic mean with the recurrence relation
27pub fn mean<T: F64>(data: &[T]) -> f64 {
28    let mut mean = 0.0;
29    for (i, val) in data.iter().enumerate() {
30        mean += (val.f64() - mean) / (i + 1) as f64;
31    }
32    mean
33}
34
35// absdev
36
37pub fn absdev<T: F64>(data: &[T]) -> f64 {
38    absdev_mean(data, mean(data))
39}
40
41// absdev_mean finds the absolute deviation of the data interface
42pub fn absdev_mean<T: F64>(data: &[T], mean: f64) -> f64 {
43    let mut sum = 0.0;
44    // the sum of the absolute deviations
45    for val in data {
46        sum += (val.f64() - mean).abs();
47    }
48    sum / data.len() as f64
49}
50
51// covariance
52
53// takes a dataset and calculates the covariance
54fn covariance_nonpub<T: F64>(data1: &[T], data2: &[T], mean1: f64, mean2: f64) -> f64 {
55    let mut res = 0.0;
56    // calculate the sum of the squares
57    for i in 0..data1.len() {
58        let delta1 = data1[i].f64() - mean1;
59        let delta2 = data2[i].f64() - mean2;
60        res += (delta1 * delta2 - res) / (i + 1) as f64;
61    }
62    res
63}
64
65pub fn covariance_mean<T: F64>(data1: &[T], data2: &[T], mean1: f64, mean2: f64) -> f64 {
66    let n = data1.len();
67    let covariance = covariance_nonpub(data1, data2, mean1, mean2);
68    covariance * (n as f64) / (n - 1) as f64
69}
70
71pub fn covariance<T: F64>(data1: &[T], data2: &[T]) -> f64 {
72    let mean1 = mean(data1);
73    let mean2 = mean(data2);
74    covariance_mean(data1, data2, mean1, mean2)
75}
76
77// correlation()
78// Calculate Pearson correlation = cov(X, Y) / (sigma_X * sigma_Y)
79// This routine efficiently computes the correlation in one pass of the
80// data and makes use of the algorithm described in:
81//
82// B. P. Welford, "Note on a Method for Calculating Corrected Sums of
83// Squares and Products", Technometrics, Vol 4, No 3, 1962.
84//
85// This paper derives a numerically stable recurrence to compute a sum
86// of products
87//
88// S = sum_{i=1..N} [ (x_i - mu_x) * (y_i - mu_y) ]
89//
90// with the relation
91//
92// S_n = S_{n-1} + ((n-1)/n) * (x_n - mu_x_{n-1}) * (y_n - mu_y_{n-1})
93//
94pub fn correlation<T: F64>(data1: &[T], data2: &[T]) -> f64 {
95    let mut sum_xsq = 0.0;
96    let mut sum_ysq = 0.0;
97    let mut sum_cross = 0.0;
98
99    // Compute:
100    // sum_xsq = Sum [ (x_i - mu_x)^2 ],
101    // sum_ysq = Sum [ (y_i - mu_y)^2 ] and
102    // sum_cross = Sum [ (x_i - mu_x) * (y_i - mu_y) ]
103    // using the above relation from Welford's paper
104
105    let mut mean_x = data1[0].f64();
106    let mut mean_y = data2[0].f64();
107
108    for i in 1..data1.len() {
109        let ratio = i as f64 / (i + 1) as f64;
110        let delta_x = data1[i].f64() - mean_x;
111        let delta_y = data2[i].f64() - mean_y;
112        sum_xsq += delta_x * delta_x * ratio;
113        sum_ysq += delta_y * delta_y * ratio;
114        sum_cross += delta_x * delta_y * ratio;
115        mean_x += delta_x / (i + 1) as f64;
116        mean_y += delta_y / (i + 1) as f64;
117    }
118
119    sum_cross / (sum_xsq.sqrt() * sum_ysq.sqrt())
120}
121
122// kurtosis
123
124pub fn kurtosis<T: F64>(data: &[T]) -> f64 {
125    let mean = mean(data);
126    let est_sd = sd_mean(data, mean);
127    kurtosis_main_sd(data, mean, est_sd)
128}
129
130pub fn kurtosis_main_sd<T: F64>(data: &[T], mean: f64, sd: f64) -> f64 {
131    let mut avg = 0.0;
132
133    // calculate the fourth moment the deviations, normalized by the sd
134    //
135    // we use a recurrence relation to stable update a running value so
136    // there aren't any large sums that can overflow
137
138    for i in 0..data.len() {
139        let x = (data[i].f64() - mean) / sd;
140        avg += (x * x * x * x - avg) / (i + 1) as f64;
141    }
142    avg - 3.0 // makes kurtosis zero for a Gaussian
143}
144
145// lag-1
146
147pub fn lag1autocorrelation<T: F64>(data: &[T]) -> f64 {
148    let mean = mean(data);
149    return lag1autocorrelation_mean(data, mean);
150}
151
152pub fn lag1autocorrelation_mean<T: F64>(data: &[T], mean: f64) -> f64 {
153    let mut q = 0.0;
154    let mut v = (data[0].f64() - mean) * (data[0].f64() - mean);
155
156    for i in 1..data.len() {
157        let delta0 = data[i - 1].f64() - mean;
158        let delta1 = data[i].f64() - mean;
159        q += (delta0 * delta1 - q) / (i + 1) as f64;
160        v += (delta1 * delta1 - v) / (i + 1) as f64;
161    }
162    q / v // r1
163}
164
165// median
166
167// MedianFromSortedData calculates the median of the sorted data.
168// Note that the function doesn't check wheather the data is actually sorted.
169pub fn median_from_sorted_data<T: F64>(sorted_data: &[T]) -> f64 {
170    let len = sorted_data.len();
171    let lhs = (len - 1) / 2;
172    let rhs = len / 2;
173
174    if len == 0 {
175        return 0.0;
176    }
177
178    if lhs == rhs {
179        sorted_data[lhs].f64()
180    } else {
181        (sorted_data[lhs].f64() + sorted_data[rhs].f64()) / 2.0
182    }
183}
184
185// minmax
186
187// Max finds the first largest member and the members position within the data
188pub fn max<T: F64>(data: &[T]) -> (f64, usize) {
189    let mut max = data[0].f64();
190    let mut max_index = 0;
191
192    for (i, val) in data.iter().enumerate() {
193        let xi = val.f64();
194
195        if xi > max {
196            max = xi;
197            max_index = i;
198        }
199
200        if xi.is_nan() {
201            max = xi;
202            max_index = i;
203            return (max, max_index);
204        }
205    }
206    (max, max_index)
207}
208
209// Min finds the first smallest member and the members position within the data
210pub fn min<T: F64>(data: &[T]) -> (f64, usize) {
211    let mut min = data[0].f64();
212    let mut min_index = 0;
213
214    for (i, val) in data.iter().enumerate() {
215        let xi = val.f64();
216
217        if xi < min {
218            min = xi;
219            min_index = i;
220        }
221
222        if xi.is_nan() {
223            min = xi;
224            min_index = i;
225            return (min, min_index);
226        }
227    }
228    (min, min_index)
229}
230
231// Minmax finds the first smallest and largest members and
232// the members positions within the data
233pub fn minmax<T: F64>(data: &[T]) -> (f64, u32, f64, u32) {
234    let mut min_index: u32 = 0;
235    let mut max_index: u32 = 0;
236    let mut min = data[0].f64();
237    let mut max = data[0].f64();
238
239    for i in 0..data.len() {
240        let xi = data[i].f64();
241
242        if xi < min {
243            min = xi;
244            min_index = i as u32;
245        }
246
247        if xi > max {
248            max = xi;
249            max_index = i as u32;
250        }
251
252        if xi.is_nan() {
253            min = xi;
254            max = xi;
255            min_index = i as u32;
256            max_index = i as u32;
257            break;
258        }
259    }
260    (min, min_index, max, max_index)
261}
262
263// pvariance
264
265// p_variance finds the pooled variance of two datasets
266pub fn p_variance<T: F64>(data1: &[T], data2: &[T]) -> f64 {
267    let n1 = data1.len();
268    let n2 = data2.len();
269
270    let var1 = variance(data1);
271    let var2 = variance(data2);
272
273    (((n1 - 1) as f64 * var1) + ((n2 - 1) as f64 * var2)) / (n1 + n2 - 2) as f64
274}
275
276// quantiles
277
278// QuantileFromSortedData performs the quantile function, also called percent
279// point function or inverse cumulative distribution function, on the sorted data.
280// Note that the function doesn't check wheather the data is actually sorted.
281pub fn quantile_from_sorted_data<T: F64>(sorted_data: &[T], f: f64) -> f64 {
282    let n = sorted_data.len() as i32;
283    let index = f * (n - 1) as f64;
284    let lhs = index as i32;
285    let delta = index - (lhs as f64);
286
287    if n == 0 {
288        return 0.0;
289    }
290
291    if lhs == n - 1 {
292        return sorted_data[lhs as usize].f64();
293    } else {
294        return (1.0 - delta) * sorted_data[lhs as usize].f64() +
295               delta * sorted_data[lhs as usize + 1].f64();
296    }
297}
298
299// skew
300
301pub fn skew<T: F64>(data: &[T]) -> f64 {
302    let mean = mean(data);
303    let sd = sd_mean(data, mean);
304    skew_mean_sd(data, mean, sd)
305}
306
307// Skew_mean_sd calculates the skewness of a dataset
308pub fn skew_mean_sd<T: F64>(data: &[T], mean: f64, sd: f64) -> f64 {
309    let mut skew = 0.0;
310    for (i, val) in data.iter().enumerate() {
311        let x = (val.f64() - mean) / sd;
312        skew += (x * x * x - skew) / (i + 1) as f64;
313    }
314    skew
315}
316
317// ttest
318
319// runs a t-test between two datasets representing independent
320// samples. Tests to see if the difference between means of the
321// samples is different from zero.
322pub fn t_test<T: F64>(data1: &[T], data2: &[T]) -> f64 {
323    let n1 = data1.len() as f64;
324    let n2 = data2.len() as f64;
325
326    let mean1 = mean(data1);
327    let mean2 = mean(data2);
328
329    let pv = p_variance(data1, data2);
330
331    (mean1 - mean2) / (pv * ((1.0 / n1) + (1.0 / n2))).sqrt()
332}
333
334// variance
335
336fn _variance<T: F64>(data: &[T], mean: f64) -> f64 {
337    let mut variance = 0.0;
338
339    // calculate the sum of the squares
340    for (i, val) in data.iter().enumerate() {
341        let delta = val.f64() - mean;
342        // TODO: long double for variance... How to implement in Rust?
343        variance += ((delta * delta) - variance) / (i + 1) as f64;
344    }
345    variance
346}
347
348pub fn variance_with_fixed_mean<T: F64>(data: &[T], mean: f64) -> f64 {
349    _variance(data, mean)
350}
351
352pub fn sd_with_fixed_mean<T: F64>(data: &[T], mean: f64) -> f64 {
353    _variance(data, mean).sqrt()
354}
355
356pub fn variance_mean<T: F64>(data: &[T], mean: f64) -> f64 {
357    let variance = _variance(data, mean);
358    variance * (data.len() as f64) / (data.len() - 1) as f64
359}
360
361pub fn sd_mean<T: F64>(data: &[T], mean: f64) -> f64 {
362    let variance = _variance(data, mean);
363    (variance * (data.len() as f64) / (data.len() - 1) as f64).sqrt()
364}
365
366pub fn variance<T: F64>(data: &[T]) -> f64 {
367    let mean = mean(data);
368    variance_mean(data, mean)
369}
370
371pub fn sd<T: F64>(data: &[T]) -> f64 {
372    let mean = mean(data);
373    return sd_mean(data, mean);
374}
375
376// tss_mean takes a dataset and finds the sum of squares about the mean
377pub fn tss_mean<T: F64>(data: &[T], mean: f64) -> f64 {
378    let mut res = 0.0;
379
380    // find the sum of the squares
381    for val in data {
382        let delta = val.f64() - mean;
383        res += delta * delta;
384    }
385    res
386}
387
388pub fn tss<T: F64>(data: &[T]) -> f64 {
389    let mean = mean(data);
390    return tss_mean(data, mean);
391}
392
393// wabsdev
394
395pub fn w_absdev<T: F64>(w: &[T], data: &[T]) -> f64 {
396    let wmean = w_mean(w, data);
397    w_absdev_mean(w, data, wmean)
398}
399
400// WAbsdev_mean calculates the weighted absolute deviation of a dataset
401pub fn w_absdev_mean<T: F64>(w: &[T], data: &[T], wmean: f64) -> f64 {
402    let mut wabsdev = 0.0;
403    let mut weight = 0.0;
404
405    // calculate the sum of the absolute deviations
406    for i in 0..data.len() {
407        let wi = w[i].f64();
408
409        if wi > 0.0 {
410            let delta = (data[i].f64() - wmean).abs();
411            weight += wi;
412            wabsdev += (delta - wabsdev) * (wi / weight);
413        }
414    }
415    wabsdev
416}
417
418// wkurtosis
419
420pub fn w_kurtosis<T: F64>(w: &[T], data: &[T]) -> f64 {
421    let wmean = w_mean(w, data);
422    let wsd = w_sd_mean(w, data, wmean);
423    w_kurtosis_mean_sd(w, data, wmean, wsd)
424}
425
426// w_kurtosis_mean calculates the kurtosis of a dataset
427pub fn w_kurtosis_mean_sd<T: F64>(w: &[T], data: &[T], wmean: f64, wsd: f64) -> f64 {
428    let mut wavg = 0.0;
429    let mut weight = 0.0;
430
431    for (i, val) in data.iter().enumerate() {
432        let wi = w[i].f64();
433
434        if wi > 0.0 {
435            let x = (val.f64() - wmean) / wsd;
436            weight += wi;
437            wavg += (x * x * x * x - wavg) * (wi / weight);
438        }
439    }
440    wavg - 3.0 // makes kurtosis zero for a Gaussian
441}
442
443// wmean
444
445// w_mean calculates the weighted arithmetic mean of a dataset
446pub fn w_mean<T: F64>(w: &[T], data: &[T]) -> f64 {
447    let mut wmean = 0.0;
448    let mut weight = 0.0;
449
450    for (i, val) in data.iter().enumerate() {
451        let wi = w[i].f64();
452
453        if wi > 0.0 {
454            weight += wi;
455            wmean += (val.f64() - wmean) * (wi / weight);
456        }
457    }
458    wmean
459}
460
461// wskew
462
463pub fn w_skew<T: F64>(w: &[T], data: &[T]) -> f64 {
464    let wmean = w_mean(w, data);
465    let wsd = w_sd_mean(w, data, wmean);
466    return w_skew_mean_sd(w, data, wmean, wsd);
467}
468
469// Compute the weighted skewness of a dataset
470pub fn w_skew_mean_sd<T: F64>(w: &[T], data: &[T], wmean: f64, wsd: f64) -> f64 {
471    let mut wskew = 0.0;
472    let mut weight = 0.0;
473
474    for (i, val) in data.iter().enumerate() {
475        let wi = w[i].f64();
476
477        if wi > 0.0 {
478            let x = (val.f64() - wmean) / wsd;
479            weight += wi;
480            wskew += (x * x * x - wskew) * (wi / weight);
481        }
482    }
483    wskew
484}
485
486// wvariance
487
488fn wvariance<T: F64>(w: &[T], data: &[T], wmean: f64) -> f64 {
489    let mut weight = 0.0;
490    let mut wvariance = 0.0;
491
492    // the sum of the squares
493    for (i, val) in w.iter().enumerate() {
494        let wi = val.f64();
495        if wi > 0.0 {
496            let delta = data[i].f64() - wmean;
497            weight += wi;
498            wvariance += (delta * delta - wvariance) * (wi / weight);
499        }
500    }
501    wvariance
502}
503
504fn factor<T: F64>(w: &[T]) -> f64 {
505    let mut a = 0.0;
506    let mut b = 0.0;
507
508    // the sum of the squares
509    for val in w {
510        let wi = val.f64();
511        if wi > 0.0 {
512            a += wi;
513            b += wi * wi;
514        }
515    }
516    (a * a) / ((a * a) - b)
517}
518
519pub fn w_variance_with_fixed_mean<T: F64>(w: &[T], data: &[T], wmean: f64) -> f64 {
520    wvariance(w, data, wmean)
521}
522
523pub fn wsd_with_fixed_mean<T: F64>(w: &[T], data: &[T], wmean: f64) -> f64 {
524    let wvariance = wvariance(w, data, wmean);
525    wvariance.sqrt()
526}
527
528pub fn w_variance_mean<T: F64>(w: &[T], data: &[T], wmean: f64) -> f64 {
529    let variance = wvariance(w, data, wmean);
530    let scale = factor(w);
531
532    scale * variance
533}
534
535pub fn w_sd_mean<T: F64>(w: &[T], data: &[T], wmean: f64) -> f64 {
536    let variance = wvariance(w, data, wmean);
537    let scale = factor(w);
538    (scale * variance).sqrt()
539}
540
541pub fn w_sd<T: F64>(w: &[T], data: &[T]) -> f64 {
542    let wmean = w_mean(w, data);
543    w_sd_mean(w, data, wmean)
544}
545
546pub fn w_variance<T: F64>(w: &[T], data: &[T]) -> f64 {
547    let wmean = w_mean(w, data);
548    w_variance_mean(w, data, wmean)
549}
550
551// w_tss_mean takes a dataset and finds the weighted sum of squares about wmean
552pub fn w_tss_mean<T: F64>(w: &[T], data: &[T], wmean: f64) -> f64 {
553    let mut res = 0.0;
554
555    // find the sum of the squares
556    for (i, val) in data.iter().enumerate() {
557        let wi = w[i].f64();
558        if wi > 0.0 {
559            let delta = val.f64() - wmean;
560            res += wi * delta * delta;
561        }
562    }
563    res
564}
565
566pub fn w_tss<T: F64>(w: &[T], data: &[T]) -> f64 {
567    let wmean = w_mean(w, data);
568    return w_tss_mean(w, data, wmean);
569}