diff options
author | gennyble <gen@nyble.dev> | 2025-01-28 22:02:35 -0600 |
---|---|---|
committer | gennyble <gen@nyble.dev> | 2025-01-28 22:02:35 -0600 |
commit | b3317c8758146bcd5c1c4828c41d6fca83de42f3 (patch) | |
tree | c472e3b5fd6aa1d9a94c4e1dbd6cd82de3b6a4fa | |
parent | 5368469772d7421bb3f7e7a3b932eb9cde4d666b (diff) | |
download | gifed-b3317c8758146bcd5c1c4828c41d6fca83de42f3.tar.gz gifed-b3317c8758146bcd5c1c4828c41d6fca83de42f3.zip |
LZW decoding functional!
-rw-r--r-- | gifed/src/lzw.rs | 178 |
1 files changed, 155 insertions, 23 deletions
diff --git a/gifed/src/lzw.rs b/gifed/src/lzw.rs index c248ebf..69a4db4 100644 --- a/gifed/src/lzw.rs +++ b/gifed/src/lzw.rs @@ -1,3 +1,4 @@ +use core::panic; use std::collections::HashMap; use bitvec::prelude::*; @@ -53,19 +54,6 @@ impl<'a> LZW<'a> { *self = Self::new(self.minimum_size) } - pub fn decode(&mut self, encoded: &[u8]) -> Vec<u8> { - let mut input = BitStream::new(); - for &byte in encoded { - input.push_bits(8, byte as u16); - } - - let mut out = BitStream::new(); - - todo!(); - - out.vec() - } - pub fn encode(&mut self, indices: &'a [u8]) -> Vec<u8> { let mut next_code = self.end_of_information_code + 1; let mut code_size = self.minimum_size + 1; @@ -128,11 +116,118 @@ impl<'a> LZW<'a> { } } +pub fn decode(minimum_size: u8, encoded: &[u8]) -> Vec<u8> { + let clear_code = 1u16 << minimum_size; + let end_of_information_code = clear_code + 1; + + let mut dictionary = vec![]; + + // Fill dictionary with self-descriptive values + for value in 0..clear_code { + dictionary.push(vec![value as u8]); + } + // Using empty vecs to fill in the spot for the clear and EoI + dictionary.push(vec![]); + dictionary.push(vec![]); + + // Setup state we need + let mut code_size = minimum_size + 1; + + let mut input = BitPopper::new(encoded); + let mut out = vec![]; + + let first_code = input.pop_bits(code_size); + if first_code != clear_code { + panic!("first code should be clear code!") + } + + // + 2 because we're prefilling the dict_buffer with the first code + let mut next_code = end_of_information_code + 2; + let mut dict_buffer = vec![input.pop_bits(code_size) as u8]; + out.push(dict_buffer[0]); + + let mut step = 2; + loop { + step += 1; + let code = input.pop_bits(code_size); + + if code == end_of_information_code { + break; + } else if code == clear_code { + dict_buffer = vec![input.pop_bits(code_size) as u8]; + out.push(dict_buffer[0]); + + dictionary.clear(); + + // Fill dictionary with self-descriptive values + for value in 0..clear_code { + dictionary.push(vec![value as u8]); + } + // Using empty vecs to fill in the spot for the clear and EoI + dictionary.push(vec![]); + dictionary.push(vec![]); + + next_code = end_of_information_code + 2; + code_size = minimum_size + 1; + continue; + } + + if code == next_code - 1 { + // We're trying to use what was last put into the dictionary, + // but it's buffered. Thankfully we can resolve that now + let mut buffer = dict_buffer.clone(); + buffer.push(dict_buffer[0]); + dictionary.push(buffer); + } + + if (code as usize) < dictionary.len() { + let indices = &dictionary[code as usize]; + + // Push the indices to the output stream + out.extend_from_slice(indices); + + // Notice the order here. We make the dict key we're adding here + // so we can drop the indices borrow and push to the dictionary. + // Do not put this after the `dict_buffer` assign. + let mut buffer = dict_buffer.clone(); + buffer.push(indices[0]); + + // Buffer the next dictionary entry + dict_buffer = indices.to_owned(); + + if code != next_code - 1 { + dictionary.push(buffer); + } + } else { + panic!( + "[step {step}] invalid code: {code} // next_code = {next_code} // dict_len = {}", + dictionary.len() + ) + } + + /*println!( + "step {step}: code {code} // code_size = {code_size} // next_code = {next_code} // out = {:?}", + &out + ); + println!( + "\tpopper: idx = {} // byte {:02X} // {:0>8b}", + input.idx, input.data[0], input.data[0] + );*/ + + next_code += 1; + if next_code > (1 << code_size) { + code_size += 1; + } + } + + out +} + #[cfg(test)] mod lzw_test { use super::*; - fn rand_against_weezl(length: usize, lzw_size: u8) { + fn encode_rand_against_weezl(length: usize, lzw_size: u8) { let range = rand::distributions::Uniform::from(0..=((1 << lzw_size as usize) - 1) as u8); let indices = rand::Rng::sample_iter(rand::thread_rng(), &range) .take(length) @@ -154,24 +249,24 @@ mod lzw_test { } #[test] - fn fortyk_against_weezl() { - rand_against_weezl(40_000, 2); + fn encode_fortyk_against_weezl() { + encode_rand_against_weezl(40_000, 2); } #[test] - fn thirtyeightk_against_weezl() { - rand_against_weezl(38_000, 2); + fn encode_thirtyeightk_against_weezl() { + encode_rand_against_weezl(38_000, 2); } #[test] - fn thirtyeightk_against_weezl_full_codesize() { - rand_against_weezl(38_000, 8); + fn encode_thirtyeightk_against_weezl_full_codesize() { + encode_rand_against_weezl(38_000, 8); } #[test] - fn twentyk_against_weezl_repeated() { + fn encode_twentyk_against_weezl_repeated() { for _ in 0..100 { - rand_against_weezl(20_000, 2) + encode_rand_against_weezl(20_000, 2) } } @@ -184,6 +279,41 @@ mod lzw_test { assert_eq!(lzout, output); } + + fn decode_rand_against_weezl(length: usize, lzw_size: u8) { + let range = rand::distributions::Uniform::from(0..=((1 << lzw_size as usize) - 1) as u8); + let indices = rand::Rng::sample_iter(rand::thread_rng(), &range) + .take(length) + .collect::<Vec<_>>(); + + let weezl = weezl::encode::Encoder::new(weezl::BitOrder::Lsb, lzw_size) + .encode(&indices) + .unwrap(); + + let us_decode = decode(lzw_size, &weezl); + + assert_eq!(indices, us_decode); + } + + #[test] + fn kilobyte_decode() { + decode_rand_against_weezl(1024, 12); + } + + #[test] + fn tenk_decode() { + decode_rand_against_weezl(10 * 1024, 12); + } + + #[test] + fn test_decode() { + let indices = vec![0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0]; + let output = vec![0x84, 0x1D, 0x81, 0x7A, 0x50]; + + let result = decode(2, &output); + + assert_eq!(result, indices) + } } struct BitStream { @@ -220,6 +350,7 @@ impl BitStream { } const MASKS: &[u8] = &[ + 0b0000_0000, 0b0000_0001, 0b0000_0011, 0b0000_0111, @@ -360,7 +491,7 @@ mod bitstream_test { #[test] fn bitpopper_pop_bits() { - let mut bs = BitPopper::new(&[0x84, 0x1D]); + let mut bs = BitPopper::new(&[0x84, 0x1D, 0x81, 0x7A, 0x50]); assert_eq!(bs.pop_bits(3), 0b100); assert_eq!(bs.pop_bits(3), 0b000); @@ -370,5 +501,6 @@ mod bitstream_test { assert_eq!(bs.pop_bits(3), 0b110); assert_eq!(bs.pop_bits(4), 0b0001); + assert_eq!(bs.pop_bits(4), 0b0001); } } |