ttpgen/
statistics.rs

1// External crates
2use plotters::prelude::*;
3use log::{info};
4
5pub struct Statistics;
6
7impl Statistics{
8
9    /// Computes the arithmetic mean (average) of a vector of integer values.
10    ///
11    /// # Arguments
12    /// * `data` - A reference to a vector of 128-bit signed integers (`Vec<i128>`)
13    ///   representing the values for which the mean will be calculated.
14    ///
15    /// # Returns
16    /// A `f64` value representing the average of all elements in `data`.
17    ///
18    /// # Panics
19    /// This function will **panic** if `data` is empty, because division by zero
20    /// would occur. Ensure that the input vector contains at least one value.
21    ///
22    /// # Example
23    /// ```
24    /// let values = vec![10_i128, 20, 30, 40];
25    /// let avg = mean(&values);
26    /// ```
27    pub fn mean(data: &Vec<i128>) -> f64 {
28        let sum: i128 = data.iter().sum();
29        sum as f64 / data.len() as f64
30    }
31
32    /// Computes the median value of a vector of integers.
33    ///
34    /// # Arguments
35    /// * `data` - A reference to a vector of `i128` values.
36    ///
37    /// # Returns
38    /// A `f64` representing the median of the input data.
39    ///
40    /// # Panics
41    /// This function will **panic** if `data` is empty, because accessing elements
42    /// in an empty slice is invalid. Ensure that the vector contains at least one value.
43    ///
44    /// # Example
45    /// ```
46    /// let values = vec![5_i128, 1, 9, 3, 7];
47    /// let med = median(&values);
48    /// ```
49    ///
50    pub fn median(data: &Vec<i128>) -> f64 {
51        let mut sorted = data.clone();
52        sorted.sort();
53
54        let mid = sorted.len() / 2;
55        if sorted.len() % 2 == 0 {
56            (sorted[mid - 1] as f64 + sorted[mid] as f64) / 2.0
57        } else {
58            sorted[mid] as f64
59        }
60    }
61
62    /// Computes the variance of a vector of integer values.
63    ///
64    /// # Arguments
65    /// * `data` - A reference to a vector of `i128` values whose variance will be computed.
66    ///
67    /// # Returns
68    /// A `f64` representing the variance of the data.
69    ///
70    /// # Panics
71    /// This function will **panic** if `data` is empty, since variance is undefined
72    /// for an empty dataset. Ensure the input contains at least one value.
73    ///
74    /// # Example
75    /// ```
76    /// let values = vec![2_i128, 4, 4, 4, 5, 5, 7, 9];
77    /// let var = Statistics::variance(&values);
78    /// ```
79    ///
80    pub fn variance(data: &Vec<i128>) -> f64 {
81        let m = Statistics::mean(data);
82        data.iter()
83            .map(|value| {
84                let diff = *value as f64 - m;
85                diff * diff
86            })
87            .sum::<f64>() / data.len() as f64
88    }
89
90    /// Computes the standard deviation of a vector of integer values.
91    ///
92    /// # Arguments
93    /// * `data` - A reference to a vector of `i128` values whose standard deviation will be computed.
94    ///
95    /// # Returns
96    /// A `f64` representing the standard deviation.
97    ///
98    /// # Panics
99    /// This function will **panic** if `data` is empty, since standard deviation
100    /// cannot be computed without at least one value.
101    ///
102    /// # Example
103    /// ```
104    /// let values = vec![2_i128, 4, 4, 4, 5, 5, 7, 9];
105    /// let sd = Statistics::std_dev(&values);
106    /// ```
107    ///
108    pub fn std_dev(data: &Vec<i128>) -> f64 {
109        Statistics::variance(data).sqrt()
110    }
111
112    /// Returns the minimum and maximum values in a vector of integer values.
113    ///
114    /// # Arguments
115    /// * `data` - A reference to a vector of `i128` values.
116    ///
117    /// # Returns
118    /// A tuple `(min, max)`:
119    /// - `min` (`i128`): The smallest value in the vector.
120    /// - `max` (`i128`): The largest value in the vector.
121    ///
122    /// # Panics
123    /// This function will **panic** if the dataset is empty, because computing a
124    /// minimum and maximum requires at least one value.
125    ///
126    /// # Example
127    /// ```
128    /// let values = vec![12_i128, 5, 30, 7, 9];
129    /// let (min_val, max_val) = Statistics::min_max(&values);
130    /// ```
131    ///
132    pub fn min_max(data: &Vec<i128>) -> (i128, i128) {
133        (*data.iter().min().unwrap(), *data.iter().max().unwrap())
134    }
135
136    /// Computes the first, second (median), and third quartiles of a vector of integer values.
137    ///
138    /// # Arguments
139    /// * `data` - A reference to a vector of `i128` values.
140    ///
141    /// # Returns
142    /// A tuple `(q1, q2, q3)` of type `(f64, f64, f64)` representing the three quartiles.
143    ///
144    /// # Panics
145    /// This function will **panic** if `data` is empty.
146    ///
147    /// # Example
148    /// ```
149    /// let values = vec![7_i128, 15, 36, 39, 40, 41, 42, 43, 47, 49];
150    /// let (q1, q2, q3) = Statistics::quartiles(&values);
151    /// ```
152    pub fn quartiles(data: &Vec<i128>) -> (f64, f64, f64) {
153        let mut sorted = data.clone();
154        sorted.sort();
155        let n = sorted.len();
156
157        let q2 = Statistics::median(&sorted);
158        let q1 = Statistics::median(&sorted[..n/2].to_vec());
159        let q3 = Statistics::median(&sorted[(n+1)/2..].to_vec());
160
161        (q1, q2, q3)
162    }
163
164    /// Plots a histogram of the given distances and saves it as an image file.
165    ///
166    /// This function divides the range of distances into a fixed number of bins (20),
167    /// counts the number of distances falling into each bin, and creates a histogram
168    /// chart using the `plotters` crate. The Y-axis is scaled based on the maximum
169    /// count plus a margin of 5 (can be changed).
170    ///
171    /// # Arguments
172    /// * `distances` - A reference to a vector of `i128` distances.
173    /// * `filename` - A string slice representing the path where the histogram image
174    ///   will be saved.
175    ///
176    /// # Panics
177    /// This function will panic if:
178    /// - The distances vector is empty.
179    /// - Writing the image file fails.
180    ///
181    /// # Example
182    /// ```
183    /// let distances = vec![10, 20, 20, 30, 40, 40, 40, 50];
184    /// Statistics::plot_histogram(&distances, "output/histogram.png");
185    /// ```
186    pub fn plot_histogram(distances: &Vec<i128>, filename: &str) {
187        let min = *distances.iter().min().unwrap();
188        let max = *distances.iter().max().unwrap();
189
190        let root = BitMapBackend::new(filename, (1280, 720))
191            .into_drawing_area();
192        root.fill(&WHITE).unwrap();
193
194        let bins = 20;
195        let step = ((max - min) / bins).max(1);
196
197        let mut counts: Vec<i128> = Vec::new();
198
199        for b in 0..bins {
200            let start = min + b * step;
201            let end = start + step;
202
203            let count = distances.iter().filter(|&&v| v >= start && v < end).count() as i128;
204            counts.push(count);
205        }
206
207        let y_max = counts.iter().max().cloned().unwrap_or(0) + 5;
208
209        let mut chart = ChartBuilder::on(&root)
210            .caption("Distance Distribution", ("sans-serif", 40))
211            .margin(10)
212            .x_label_area_size(40)
213            .y_label_area_size(40)
214            .build_cartesian_2d(min..max, 0..y_max)
215            .unwrap();
216
217        chart.configure_mesh().draw().unwrap();
218
219        for (b, &count) in counts.iter().enumerate() {
220            let start = min + (b as i128) * step;
221            let end = start + step;
222
223            chart.draw_series(std::iter::once(Rectangle::new(
224                [(start, 0), (end, count)],
225                BLUE.mix(0.6).filled(),
226            ))).unwrap();
227        }
228    }
229
230    /// Computes and logs statistical summaries of a vector of distances.
231    ///
232    /// # Arguments
233    /// * `distances` - A reference to a vector of `i128` values representing distances.
234    ///
235    /// # Example
236    /// ```
237    /// let distances = vec![10, 20, 30, 40, 50];
238    /// Statistics::generate_statistics(&distances);
239    /// ```
240    pub fn generate_statistics(distances: &Vec<i128>) {
241
242        info!("Mean: {}", Statistics::mean(&distances));
243        info!("Median: {}", Statistics::median(&distances));
244        info!("Variance: {}", Statistics::variance(&distances));
245        info!("Std Dev: {}", Statistics::std_dev(&distances));
246        info!("Min-Max: {:?}", Statistics::min_max(&distances));
247        info!("Quartiles: {:?}", Statistics::quartiles(&distances));
248
249        Statistics::plot_histogram(&distances, "dist_histogram.png");
250    }
251
252}