ttpgen/statistics.rs
1// External crates
2use plotters::prelude::*;
3use log::{info};
4
5pub struct Statistics;
6
7impl Statistics{
8
9 /// Computes the arithmetic mean (average) of a vector of integer values.
10 ///
11 /// # Arguments
12 /// * `data` - A reference to a vector of 128-bit signed integers (`Vec<i128>`)
13 /// representing the values for which the mean will be calculated.
14 ///
15 /// # Returns
16 /// A `f64` value representing the average of all elements in `data`.
17 ///
18 /// # Panics
19 /// This function will **panic** if `data` is empty, because division by zero
20 /// would occur. Ensure that the input vector contains at least one value.
21 ///
22 /// # Example
23 /// ```
24 /// let values = vec![10_i128, 20, 30, 40];
25 /// let avg = mean(&values);
26 /// ```
27 pub fn mean(data: &Vec<i128>) -> f64 {
28 let sum: i128 = data.iter().sum();
29 sum as f64 / data.len() as f64
30 }
31
32 /// Computes the median value of a vector of integers.
33 ///
34 /// # Arguments
35 /// * `data` - A reference to a vector of `i128` values.
36 ///
37 /// # Returns
38 /// A `f64` representing the median of the input data.
39 ///
40 /// # Panics
41 /// This function will **panic** if `data` is empty, because accessing elements
42 /// in an empty slice is invalid. Ensure that the vector contains at least one value.
43 ///
44 /// # Example
45 /// ```
46 /// let values = vec![5_i128, 1, 9, 3, 7];
47 /// let med = median(&values);
48 /// ```
49 ///
50 pub fn median(data: &Vec<i128>) -> f64 {
51 let mut sorted = data.clone();
52 sorted.sort();
53
54 let mid = sorted.len() / 2;
55 if sorted.len() % 2 == 0 {
56 (sorted[mid - 1] as f64 + sorted[mid] as f64) / 2.0
57 } else {
58 sorted[mid] as f64
59 }
60 }
61
62 /// Computes the variance of a vector of integer values.
63 ///
64 /// # Arguments
65 /// * `data` - A reference to a vector of `i128` values whose variance will be computed.
66 ///
67 /// # Returns
68 /// A `f64` representing the variance of the data.
69 ///
70 /// # Panics
71 /// This function will **panic** if `data` is empty, since variance is undefined
72 /// for an empty dataset. Ensure the input contains at least one value.
73 ///
74 /// # Example
75 /// ```
76 /// let values = vec![2_i128, 4, 4, 4, 5, 5, 7, 9];
77 /// let var = Statistics::variance(&values);
78 /// ```
79 ///
80 pub fn variance(data: &Vec<i128>) -> f64 {
81 let m = Statistics::mean(data);
82 data.iter()
83 .map(|value| {
84 let diff = *value as f64 - m;
85 diff * diff
86 })
87 .sum::<f64>() / data.len() as f64
88 }
89
90 /// Computes the standard deviation of a vector of integer values.
91 ///
92 /// # Arguments
93 /// * `data` - A reference to a vector of `i128` values whose standard deviation will be computed.
94 ///
95 /// # Returns
96 /// A `f64` representing the standard deviation.
97 ///
98 /// # Panics
99 /// This function will **panic** if `data` is empty, since standard deviation
100 /// cannot be computed without at least one value.
101 ///
102 /// # Example
103 /// ```
104 /// let values = vec![2_i128, 4, 4, 4, 5, 5, 7, 9];
105 /// let sd = Statistics::std_dev(&values);
106 /// ```
107 ///
108 pub fn std_dev(data: &Vec<i128>) -> f64 {
109 Statistics::variance(data).sqrt()
110 }
111
112 /// Returns the minimum and maximum values in a vector of integer values.
113 ///
114 /// # Arguments
115 /// * `data` - A reference to a vector of `i128` values.
116 ///
117 /// # Returns
118 /// A tuple `(min, max)`:
119 /// - `min` (`i128`): The smallest value in the vector.
120 /// - `max` (`i128`): The largest value in the vector.
121 ///
122 /// # Panics
123 /// This function will **panic** if the dataset is empty, because computing a
124 /// minimum and maximum requires at least one value.
125 ///
126 /// # Example
127 /// ```
128 /// let values = vec![12_i128, 5, 30, 7, 9];
129 /// let (min_val, max_val) = Statistics::min_max(&values);
130 /// ```
131 ///
132 pub fn min_max(data: &Vec<i128>) -> (i128, i128) {
133 (*data.iter().min().unwrap(), *data.iter().max().unwrap())
134 }
135
136 /// Computes the first, second (median), and third quartiles of a vector of integer values.
137 ///
138 /// # Arguments
139 /// * `data` - A reference to a vector of `i128` values.
140 ///
141 /// # Returns
142 /// A tuple `(q1, q2, q3)` of type `(f64, f64, f64)` representing the three quartiles.
143 ///
144 /// # Panics
145 /// This function will **panic** if `data` is empty.
146 ///
147 /// # Example
148 /// ```
149 /// let values = vec![7_i128, 15, 36, 39, 40, 41, 42, 43, 47, 49];
150 /// let (q1, q2, q3) = Statistics::quartiles(&values);
151 /// ```
152 pub fn quartiles(data: &Vec<i128>) -> (f64, f64, f64) {
153 let mut sorted = data.clone();
154 sorted.sort();
155 let n = sorted.len();
156
157 let q2 = Statistics::median(&sorted);
158 let q1 = Statistics::median(&sorted[..n/2].to_vec());
159 let q3 = Statistics::median(&sorted[(n+1)/2..].to_vec());
160
161 (q1, q2, q3)
162 }
163
164 /// Plots a histogram of the given distances and saves it as an image file.
165 ///
166 /// This function divides the range of distances into a fixed number of bins (20),
167 /// counts the number of distances falling into each bin, and creates a histogram
168 /// chart using the `plotters` crate. The Y-axis is scaled based on the maximum
169 /// count plus a margin of 5 (can be changed).
170 ///
171 /// # Arguments
172 /// * `distances` - A reference to a vector of `i128` distances.
173 /// * `filename` - A string slice representing the path where the histogram image
174 /// will be saved.
175 ///
176 /// # Panics
177 /// This function will panic if:
178 /// - The distances vector is empty.
179 /// - Writing the image file fails.
180 ///
181 /// # Example
182 /// ```
183 /// let distances = vec![10, 20, 20, 30, 40, 40, 40, 50];
184 /// Statistics::plot_histogram(&distances, "output/histogram.png");
185 /// ```
186 pub fn plot_histogram(distances: &Vec<i128>, filename: &str) {
187 let min = *distances.iter().min().unwrap();
188 let max = *distances.iter().max().unwrap();
189
190 let root = BitMapBackend::new(filename, (1280, 720))
191 .into_drawing_area();
192 root.fill(&WHITE).unwrap();
193
194 let bins = 20;
195 let step = ((max - min) / bins).max(1);
196
197 let mut counts: Vec<i128> = Vec::new();
198
199 for b in 0..bins {
200 let start = min + b * step;
201 let end = start + step;
202
203 let count = distances.iter().filter(|&&v| v >= start && v < end).count() as i128;
204 counts.push(count);
205 }
206
207 let y_max = counts.iter().max().cloned().unwrap_or(0) + 5;
208
209 let mut chart = ChartBuilder::on(&root)
210 .caption("Distance Distribution", ("sans-serif", 40))
211 .margin(10)
212 .x_label_area_size(40)
213 .y_label_area_size(40)
214 .build_cartesian_2d(min..max, 0..y_max)
215 .unwrap();
216
217 chart.configure_mesh().draw().unwrap();
218
219 for (b, &count) in counts.iter().enumerate() {
220 let start = min + (b as i128) * step;
221 let end = start + step;
222
223 chart.draw_series(std::iter::once(Rectangle::new(
224 [(start, 0), (end, count)],
225 BLUE.mix(0.6).filled(),
226 ))).unwrap();
227 }
228 }
229
230 /// Computes and logs statistical summaries of a vector of distances.
231 ///
232 /// # Arguments
233 /// * `distances` - A reference to a vector of `i128` values representing distances.
234 ///
235 /// # Example
236 /// ```
237 /// let distances = vec![10, 20, 30, 40, 50];
238 /// Statistics::generate_statistics(&distances);
239 /// ```
240 pub fn generate_statistics(distances: &Vec<i128>) {
241
242 info!("Mean: {}", Statistics::mean(&distances));
243 info!("Median: {}", Statistics::median(&distances));
244 info!("Variance: {}", Statistics::variance(&distances));
245 info!("Std Dev: {}", Statistics::std_dev(&distances));
246 info!("Min-Max: {:?}", Statistics::min_max(&distances));
247 info!("Quartiles: {:?}", Statistics::quartiles(&distances));
248
249 Statistics::plot_histogram(&distances, "dist_histogram.png");
250 }
251
252}