about summary refs log tree commit diff
diff options
context:
space:
mode:
authorgennyble <gen@nyble.dev>2025-01-28 22:02:35 -0600
committergennyble <gen@nyble.dev>2025-01-28 22:02:35 -0600
commitb3317c8758146bcd5c1c4828c41d6fca83de42f3 (patch)
treec472e3b5fd6aa1d9a94c4e1dbd6cd82de3b6a4fa
parent5368469772d7421bb3f7e7a3b932eb9cde4d666b (diff)
downloadgifed-b3317c8758146bcd5c1c4828c41d6fca83de42f3.tar.gz
gifed-b3317c8758146bcd5c1c4828c41d6fca83de42f3.zip
LZW decoding functional!
-rw-r--r--gifed/src/lzw.rs178
1 files changed, 155 insertions, 23 deletions
diff --git a/gifed/src/lzw.rs b/gifed/src/lzw.rs
index c248ebf..69a4db4 100644
--- a/gifed/src/lzw.rs
+++ b/gifed/src/lzw.rs
@@ -1,3 +1,4 @@
+use core::panic;
 use std::collections::HashMap;
 
 use bitvec::prelude::*;
@@ -53,19 +54,6 @@ impl<'a> LZW<'a> {
 		*self = Self::new(self.minimum_size)
 	}
 
-	pub fn decode(&mut self, encoded: &[u8]) -> Vec<u8> {
-		let mut input = BitStream::new();
-		for &byte in encoded {
-			input.push_bits(8, byte as u16);
-		}
-
-		let mut out = BitStream::new();
-
-		todo!();
-
-		out.vec()
-	}
-
 	pub fn encode(&mut self, indices: &'a [u8]) -> Vec<u8> {
 		let mut next_code = self.end_of_information_code + 1;
 		let mut code_size = self.minimum_size + 1;
@@ -128,11 +116,118 @@ impl<'a> LZW<'a> {
 	}
 }
 
+pub fn decode(minimum_size: u8, encoded: &[u8]) -> Vec<u8> {
+	let clear_code = 1u16 << minimum_size;
+	let end_of_information_code = clear_code + 1;
+
+	let mut dictionary = vec![];
+
+	// Fill dictionary with self-descriptive values
+	for value in 0..clear_code {
+		dictionary.push(vec![value as u8]);
+	}
+	// Using empty vecs to fill in the spot for the clear and EoI
+	dictionary.push(vec![]);
+	dictionary.push(vec![]);
+
+	// Setup state we need
+	let mut code_size = minimum_size + 1;
+
+	let mut input = BitPopper::new(encoded);
+	let mut out = vec![];
+
+	let first_code = input.pop_bits(code_size);
+	if first_code != clear_code {
+		panic!("first code should be clear code!")
+	}
+
+	// + 2 because we're prefilling the dict_buffer with the first code
+	let mut next_code = end_of_information_code + 2;
+	let mut dict_buffer = vec![input.pop_bits(code_size) as u8];
+	out.push(dict_buffer[0]);
+
+	let mut step = 2;
+	loop {
+		step += 1;
+		let code = input.pop_bits(code_size);
+
+		if code == end_of_information_code {
+			break;
+		} else if code == clear_code {
+			dict_buffer = vec![input.pop_bits(code_size) as u8];
+			out.push(dict_buffer[0]);
+
+			dictionary.clear();
+
+			// Fill dictionary with self-descriptive values
+			for value in 0..clear_code {
+				dictionary.push(vec![value as u8]);
+			}
+			// Using empty vecs to fill in the spot for the clear and EoI
+			dictionary.push(vec![]);
+			dictionary.push(vec![]);
+
+			next_code = end_of_information_code + 2;
+			code_size = minimum_size + 1;
+			continue;
+		}
+
+		if code == next_code - 1 {
+			// We're trying to use what was last put into the dictionary,
+			// but it's buffered. Thankfully we can resolve that now
+			let mut buffer = dict_buffer.clone();
+			buffer.push(dict_buffer[0]);
+			dictionary.push(buffer);
+		}
+
+		if (code as usize) < dictionary.len() {
+			let indices = &dictionary[code as usize];
+
+			// Push the indices to the output stream
+			out.extend_from_slice(indices);
+
+			// Notice the order here. We make the dict key we're adding here
+			// so we can drop the indices borrow and push to the dictionary.
+			// Do not put this after the `dict_buffer` assign.
+			let mut buffer = dict_buffer.clone();
+			buffer.push(indices[0]);
+
+			// Buffer the next dictionary entry
+			dict_buffer = indices.to_owned();
+
+			if code != next_code - 1 {
+				dictionary.push(buffer);
+			}
+		} else {
+			panic!(
+				"[step {step}] invalid code: {code} // next_code = {next_code} // dict_len = {}",
+				dictionary.len()
+			)
+		}
+
+		/*println!(
+			"step {step}: code {code} // code_size = {code_size} // next_code = {next_code} // out = {:?}",
+			&out
+		);
+		println!(
+			"\tpopper: idx = {} // byte {:02X} // {:0>8b}",
+			input.idx, input.data[0], input.data[0]
+		);*/
+
+		next_code += 1;
+		if next_code > (1 << code_size) {
+			code_size += 1;
+		}
+	}
+
+	out
+}
+
 #[cfg(test)]
 mod lzw_test {
 	use super::*;
 
-	fn rand_against_weezl(length: usize, lzw_size: u8) {
+	fn encode_rand_against_weezl(length: usize, lzw_size: u8) {
 		let range = rand::distributions::Uniform::from(0..=((1 << lzw_size as usize) - 1) as u8);
 		let indices = rand::Rng::sample_iter(rand::thread_rng(), &range)
 			.take(length)
@@ -154,24 +249,24 @@ mod lzw_test {
 	}
 
 	#[test]
-	fn fortyk_against_weezl() {
-		rand_against_weezl(40_000, 2);
+	fn encode_fortyk_against_weezl() {
+		encode_rand_against_weezl(40_000, 2);
 	}
 
 	#[test]
-	fn thirtyeightk_against_weezl() {
-		rand_against_weezl(38_000, 2);
+	fn encode_thirtyeightk_against_weezl() {
+		encode_rand_against_weezl(38_000, 2);
 	}
 
 	#[test]
-	fn thirtyeightk_against_weezl_full_codesize() {
-		rand_against_weezl(38_000, 8);
+	fn encode_thirtyeightk_against_weezl_full_codesize() {
+		encode_rand_against_weezl(38_000, 8);
 	}
 
 	#[test]
-	fn twentyk_against_weezl_repeated() {
+	fn encode_twentyk_against_weezl_repeated() {
 		for _ in 0..100 {
-			rand_against_weezl(20_000, 2)
+			encode_rand_against_weezl(20_000, 2)
 		}
 	}
 
@@ -184,6 +279,41 @@ mod lzw_test {
 
 		assert_eq!(lzout, output);
 	}
+
+	fn decode_rand_against_weezl(length: usize, lzw_size: u8) {
+		let range = rand::distributions::Uniform::from(0..=((1 << lzw_size as usize) - 1) as u8);
+		let indices = rand::Rng::sample_iter(rand::thread_rng(), &range)
+			.take(length)
+			.collect::<Vec<_>>();
+
+		let weezl = weezl::encode::Encoder::new(weezl::BitOrder::Lsb, lzw_size)
+			.encode(&indices)
+			.unwrap();
+
+		let us_decode = decode(lzw_size, &weezl);
+
+		assert_eq!(indices, us_decode);
+	}
+
+	#[test]
+	fn kilobyte_decode() {
+		decode_rand_against_weezl(1024, 12);
+	}
+
+	#[test]
+	fn tenk_decode() {
+		decode_rand_against_weezl(10 * 1024, 12);
+	}
+
+	#[test]
+	fn test_decode() {
+		let indices = vec![0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0];
+		let output = vec![0x84, 0x1D, 0x81, 0x7A, 0x50];
+
+		let result = decode(2, &output);
+
+		assert_eq!(result, indices)
+	}
 }
 
 struct BitStream {
@@ -220,6 +350,7 @@ impl BitStream {
 }
 
 const MASKS: &[u8] = &[
+	0b0000_0000,
 	0b0000_0001,
 	0b0000_0011,
 	0b0000_0111,
@@ -360,7 +491,7 @@ mod bitstream_test {
 
 	#[test]
 	fn bitpopper_pop_bits() {
-		let mut bs = BitPopper::new(&[0x84, 0x1D]);
+		let mut bs = BitPopper::new(&[0x84, 0x1D, 0x81, 0x7A, 0x50]);
 
 		assert_eq!(bs.pop_bits(3), 0b100);
 		assert_eq!(bs.pop_bits(3), 0b000);
@@ -370,5 +501,6 @@ mod bitstream_test {
 
 		assert_eq!(bs.pop_bits(3), 0b110);
 		assert_eq!(bs.pop_bits(4), 0b0001);
+		assert_eq!(bs.pop_bits(4), 0b0001);
 	}
 }