diff options
Diffstat (limited to 'src/lzw.rs')
-rw-r--r-- | src/lzw.rs | 306 |
1 files changed, 152 insertions, 154 deletions
diff --git a/src/lzw.rs b/src/lzw.rs index 1970617..053c5c4 100644 --- a/src/lzw.rs +++ b/src/lzw.rs @@ -2,174 +2,172 @@ use std::collections::HashMap; pub struct LZW {} impl LZW { - //TODO: Cleanup and make not awful - pub fn encode(minimum_size: u8, indicies: &[u8]) -> Vec<u8> { - let mut dictionary: HashMap<Vec<u8>, u16> = HashMap::new(); - - let cc: u16 = 2u16.pow(minimum_size as u32); - let eoi: u16 = cc+1; - - let mut index: u16 = eoi+1; // Next code number - let mut codesize: u8 = minimum_size+1; // Current code size - - //println!("cc: {} - eoi: {}", cc, eoi); - - // Populate starting codes - for code in 0..cc { - dictionary.insert(vec![code as u8], code); - } - - let mut iter = indicies.iter(); - let mut out = BitStream::new(); - let mut buffer = vec![*iter.next().unwrap()]; //TODO: Remove unwrap - - //"Encoders should output a Clear code as the first code of each image data stream." - out.push_bits(codesize, cc); - - //println!("Before Loop\n\tBuffer: {:?}\n\tCodesize:{}\n\tIndex:{}", buffer, codesize, index); - - for next_code in iter { - buffer.push(*next_code); - - //println!("Buffer: {:?} - Codesize:{} - Index:{}\n\tDict: {:?}", buffer, codesize, index, dictionary); - - match dictionary.get(&buffer) { - Some(_code) => { - //println!("\tPresent!"); - continue; - }, - None => { - buffer.pop(); - match dictionary.get(&buffer) { - Some(code) => { - out.push_bits(codesize, *code); - //println!("\tOutputting {} with {} bits. Buffer is {:?}", *code, codesize, buffer); - - // Add new entry for buffer and increase the index - buffer.push(*next_code); - dictionary.insert(buffer, index); - index += 1; - - // Increase code size if we should - if index-1 == 2u16.pow(codesize as u32) { - codesize += 1; - } - - // Reset the buffer to only contain next_code - buffer = vec![*next_code]; - }, - None => panic!("No dictionary entry when there should be! Something is wrong!") - } - } - } - } - - if buffer.len() > 0 { - match dictionary.get(&buffer) { - None => panic!("No codes left but not in the dictionary!"), - Some(code) => out.push_bits(codesize, *code) - } - } - out.push_bits(codesize, eoi); - - out.vec() - } + pub fn encode(minimum_size: u8, indicies: &[u8]) -> Vec<u8> { + let mut dictionary: HashMap<Vec<u8>, u16> = HashMap::new(); + + let cc = 2u16.pow(minimum_size as u32); + let eoi = cc + 1; + + // Fill dictionary with self-descriptive values + for value in 0..cc { + dictionary.insert(vec![value as u8], value); + } + + let mut next_code = eoi + 1; + let mut code_size = minimum_size + 1; + + let mut iter = indicies.into_iter(); + let mut out = BitStream::new(); + let mut buffer = vec![*iter.next().unwrap()]; + + out.push_bits(code_size, cc); + + for &indicie in iter { + buffer.push(indicie); + + if !dictionary.contains_key(&buffer) { + buffer.pop(); + + if let Some(&code) = dictionary.get(&buffer) { + out.push_bits(code_size, code); + + // Put the code back and add the vec to the dict + buffer.push(indicie); + dictionary.insert(buffer.clone(), next_code); + next_code += 1; + + // If the next_code can't fit in the code_size, we have to increase it + if next_code - 1 == 2u16.pow(code_size as u32) { + code_size += 1; + } + + buffer.clear(); + buffer.push(indicie); + } else { + unreachable!() + } + } + } + + if buffer.len() > 0 { + match dictionary.get(&buffer) { + Some(&code) => out.push_bits(code_size, code), + None => { + panic!("Codes left in the buffer but the buffer is not a valid dictionary key!") + } + } + } + out.push_bits(code_size, eoi); + + out.vec() + } } #[cfg(test)] mod lzw_test { - use super::*; + use super::*; - #[test] - fn encode() { - let indicies = vec![0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0]; - let output = vec![0x84, 0x1D, 0x81, 0x7A, 0x50]; + #[test] + fn encode() { + let indicies = vec![0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0]; + let output = vec![0x84, 0x1D, 0x81, 0x7A, 0x50]; - let lzout = LZW::encode(2, &indicies); + let lzout = LZW::encode(2, &indicies); - assert_eq!(lzout, output); - } + assert_eq!(lzout, output); + } + + #[test] + fn against_weezl() { + let indicies = vec![0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0]; + let weezl = weezl::encode::Encoder::new(weezl::BitOrder::Lsb, 2) + .encode(&indicies) + .unwrap(); + let us = LZW::encode(2, &indicies); + + assert_eq!(weezl, us); + } } struct BitStream { - formed: Vec<u8>, - current: u8, - index: u8 + formed: Vec<u8>, + current: u8, + index: u8, } impl BitStream { - fn new() -> Self { - Self { - formed: vec![], - current: 0, - index: 0 - } - } - - fn push_bits(&mut self, count: u8, data: u16) { - let mut new_index = self.index + count; - let mut current32 = (self.current as u32) | ((data as u32) << self.index); - - loop { - if new_index >= 8 { - self.formed.push(current32 as u8); - current32 = current32 >> 8; - new_index -= 8; - } else { - self.current = current32 as u8; - self.index = new_index; - - break; - } - } - } - - fn vec(self) -> Vec<u8> { - let mut out = self.formed; - - if self.index != 0 { - out.push(self.current); - } - - out - } + fn new() -> Self { + Self { + formed: vec![], + current: 0, + index: 0, + } + } + + fn push_bits(&mut self, count: u8, data: u16) { + let mut new_index = self.index + count; + let mut current32 = (self.current as u32) | ((data as u32) << self.index); + + loop { + if new_index >= 8 { + self.formed.push(current32 as u8); + current32 = current32 >> 8; + new_index -= 8; + } else { + self.current = current32 as u8; + self.index = new_index; + + break; + } + } + } + + fn vec(self) -> Vec<u8> { + let mut out = self.formed; + + if self.index != 0 { + out.push(self.current); + } + + out + } } #[cfg(test)] mod bitstream_test { - use super::*; - - #[test] - fn short_push() { - let mut bs = BitStream::new(); - bs.push_bits(2, 3); - bs.push_bits(2, 3); - bs.push_bits(3, 1); - bs.push_bits(2, 3); - - let bsvec = bs.vec(); - - for byte in &bsvec { - print!("{:b} ", byte); - } - println!(""); - - assert_eq!(bsvec, vec![0b1001_1111, 0b0000_0001]); - } - - #[test] - fn long_push() { - let mut bs = BitStream::new(); - bs.push_bits(1, 1); - bs.push_bits(12, 2049); - - let bsvec = bs.vec(); - - for byte in &bsvec { - print!("{:b} ", byte); - } - println!(""); - - assert_eq!(bsvec, vec![0b0000_0011, 0b0001_0000]); - } -} \ No newline at end of file + use super::*; + + #[test] + fn short_push() { + let mut bs = BitStream::new(); + bs.push_bits(2, 3); + bs.push_bits(2, 3); + bs.push_bits(3, 1); + bs.push_bits(2, 3); + + let bsvec = bs.vec(); + + for byte in &bsvec { + print!("{:b} ", byte); + } + println!(""); + + assert_eq!(bsvec, vec![0b1001_1111, 0b0000_0001]); + } + + #[test] + fn long_push() { + let mut bs = BitStream::new(); + bs.push_bits(1, 1); + bs.push_bits(12, 2049); + + let bsvec = bs.vec(); + + for byte in &bsvec { + print!("{:b} ", byte); + } + println!(""); + + assert_eq!(bsvec, vec![0b0000_0011, 0b0001_0000]); + } +} |