ostree_ext/
statistics.rs

1//! This module holds implementations of some basic statistical properties, such as mean and standard deviation.
2
3pub(crate) fn mean(data: &[u64]) -> Option<f64> {
4    if data.is_empty() {
5        None
6    } else {
7        Some(data.iter().sum::<u64>() as f64 / data.len() as f64)
8    }
9}
10
11pub(crate) fn std_deviation(data: &[u64]) -> Option<f64> {
12    match (mean(data), data.len()) {
13        (Some(data_mean), count) if count > 0 => {
14            let variance = data
15                .iter()
16                .map(|value| {
17                    let diff = data_mean - (*value as f64);
18                    diff * diff
19                })
20                .sum::<f64>()
21                / count as f64;
22            Some(variance.sqrt())
23        }
24        _ => None,
25    }
26}
27
28//Assumed sorted
29pub(crate) fn median_absolute_deviation(data: &mut [u64]) -> Option<(f64, f64)> {
30    if data.is_empty() {
31        None
32    } else {
33        //Sort data
34        //data.sort_by(|a, b| a.partial_cmp(b).unwrap());
35
36        //Find median of data
37        let median_data: f64 = match data.len() % 2 {
38            1 => data[data.len() / 2] as f64,
39            _ => 0.5 * (data[data.len() / 2 - 1] + data[data.len() / 2]) as f64,
40        };
41
42        //Absolute deviations
43        let mut absolute_deviations = Vec::new();
44        for size in data {
45            absolute_deviations.push(f64::abs(*size as f64 - median_data))
46        }
47
48        absolute_deviations.sort_by(|a, b| a.partial_cmp(b).unwrap());
49        let l = absolute_deviations.len();
50        let mad: f64 = match l % 2 {
51            1 => absolute_deviations[l / 2],
52            _ => 0.5 * (absolute_deviations[l / 2 - 1] + absolute_deviations[l / 2]),
53        };
54
55        Some((median_data, mad))
56    }
57}
58#[cfg(test)]
59mod tests {
60    use super::*;
61
62    #[test]
63    fn test_mean() {
64        assert_eq!(mean(&[]), None);
65        for v in [0u64, 1, 5, 100] {
66            assert_eq!(mean(&[v]), Some(v as f64));
67        }
68        assert_eq!(mean(&[0, 1]), Some(0.5));
69        assert_eq!(mean(&[0, 5, 100]), Some(35.0));
70        assert_eq!(mean(&[7, 4, 30, 14]), Some(13.75));
71    }
72
73    #[test]
74    fn test_std_deviation() {
75        assert_eq!(std_deviation(&[]), None);
76        for v in [0u64, 1, 5, 100] {
77            assert_eq!(std_deviation(&[v]), Some(0 as f64));
78        }
79        assert_eq!(std_deviation(&[1, 4]), Some(1.5));
80        assert_eq!(std_deviation(&[2, 2, 2, 2]), Some(0.0));
81        assert_eq!(
82            std_deviation(&[1, 20, 300, 4000, 50000, 600000, 7000000, 80000000]),
83            Some(26193874.56387471)
84        );
85    }
86
87    #[test]
88    fn test_median_absolute_deviation() {
89        //Assumes sorted
90        assert_eq!(median_absolute_deviation(&mut []), None);
91        for v in [0u64, 1, 5, 100] {
92            assert_eq!(median_absolute_deviation(&mut [v]), Some((v as f64, 0.0)));
93        }
94        assert_eq!(median_absolute_deviation(&mut [1, 4]), Some((2.5, 1.5)));
95        assert_eq!(
96            median_absolute_deviation(&mut [2, 2, 2, 2]),
97            Some((2.0, 0.0))
98        );
99        assert_eq!(
100            median_absolute_deviation(&mut [
101                1, 2, 3, 3, 4, 4, 4, 5, 5, 6, 6, 6, 7, 7, 7, 8, 9, 12, 52, 90
102            ]),
103            Some((6.0, 2.0))
104        );
105
106        //if more than half of the data has the same value, MAD = 0, thus any
107        //value different from the residual median is classified as an outlier
108        assert_eq!(
109            median_absolute_deviation(&mut [0, 1, 1, 1, 1, 1, 1, 1, 0]),
110            Some((1.0, 0.0))
111        );
112    }
113}