diff options
Diffstat (limited to 'src/nih_kmeans.rs')
-rw-r--r-- | src/nih_kmeans.rs | 59 |
1 files changed, 54 insertions, 5 deletions
diff --git a/src/nih_kmeans.rs b/src/nih_kmeans.rs index 98a9965..a34d528 100644 --- a/src/nih_kmeans.rs +++ b/src/nih_kmeans.rs @@ -1,3 +1,6 @@ +use std::collections::HashMap; + +#[cfg(rand)] use rand::{prelude::*, seq::index::sample}; use rgb::{RGB, RGB8}; @@ -5,14 +8,40 @@ pub struct KMeans { samples: Vec<RGB8>, } +#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +struct HashableRGBF { + inner: (u32, u32, u32), +} + +impl From<RGB<f32>> for HashableRGBF { + fn from(value: RGB<f32>) -> Self { + Self { + inner: (value.r.to_bits(), value.g.to_bits(), value.b.to_bits()), + } + } +} + impl KMeans { pub fn new(samples: Vec<RGB8>) -> Self { Self { samples } } pub fn get_k_colors(&self, k: usize, max_iter: usize) -> Vec<RGB8> { let mut centroids = self.get_centroid_seeds_simple(k); + for _ in 0..max_iter { - todo!() + let mut clusters: HashMap<HashableRGBF, Vec<RGB8>> = HashMap::new(); + + for &sample in &self.samples { + let closest_centroid = Self::closest_centroid(¢roids, sample.into()); + clusters + .entry(closest_centroid.into()) + .or_default() + .push(sample); + } + centroids = clusters + .into_values() + .map(|members| vector_avg(&members)) + .collect() } centroids .into_iter() @@ -20,15 +49,19 @@ impl KMeans { .collect() } - /// Uses k-means++ algorithm (https://www.mathworks.com/help/stats/kmeans.html#bueq7aj-5) + /// Picks a point at random (if feature rand is enabled) for the first centroid, then iteratively adds the point furthest away from any centroid + /// A more complex solution is the probabilistic k-means++ algorithm (https://www.mathworks.com/help/stats/kmeans.html#bueq7aj-5) fn get_centroid_seeds_simple(&self, k: usize) -> Vec<RGB<f32>> { if k >= self.samples.len() { return self.samples.iter().map(|&v| v.into()).collect(); } - let mut rng = thread_rng(); - let mut centroids: Vec<RGB<f32>> = - vec![self.samples[rng.gen_range(0..self.samples.len())].into()]; + #[cfg(rand)] + let index = thread_rng().gen_range(0..self.samples.len()); + #[cfg(not(rand))] + let index = 0; //lol + + let mut centroids: Vec<RGB<f32>> = vec![self.samples[index].into()]; while centroids.len() < k { let next = *self .samples @@ -58,6 +91,7 @@ impl KMeans { .unwrap() } + #[cfg(rand)] fn get_centroid_seeds_random(&self, k: usize) -> Vec<RGB<f32>> { if k >= self.samples.len() { return self.samples.iter().map(|&v| v.into()).collect(); @@ -78,3 +112,18 @@ fn vector_diff_2_norm(v1: RGB<f32>, v2: RGB<f32>) -> f32 { let diff = vector_diff(v1, v2); (diff.r.powi(2) + diff.g.powi(2) + diff.b.powi(2)).sqrt() } + +fn vector_sum(acc: RGB<f32>, elem: RGB<f32>) -> RGB<f32> { + RGB::new(acc.r + elem.r, acc.g + elem.g, acc.b + elem.b) +} + +fn vector_avg(vs: &[RGB8]) -> RGB<f32> { + let summed = vs.iter().fold(RGB::new(0.0, 0.0, 0.0), |acc, elem| { + vector_sum(acc, (*elem).into()) + }); + RGB::new( + summed.r / vs.len() as f32, + summed.g / vs.len() as f32, + summed.b / vs.len() as f32, + ) +} |