about summary refs log tree commit diff
path: root/src/lzw.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/lzw.rs')
-rw-r--r--src/lzw.rs306
1 files changed, 152 insertions, 154 deletions
diff --git a/src/lzw.rs b/src/lzw.rs
index 1970617..053c5c4 100644
--- a/src/lzw.rs
+++ b/src/lzw.rs
@@ -2,174 +2,172 @@ use std::collections::HashMap;
 
 pub struct LZW {}
 impl LZW {
-	//TODO: Cleanup and make not awful
-	pub fn encode(minimum_size: u8, indicies: &[u8]) -> Vec<u8> {
-		let mut dictionary: HashMap<Vec<u8>, u16> = HashMap::new();
-
-		let cc: u16 = 2u16.pow(minimum_size as u32);
-		let eoi: u16 = cc+1;
-
-		let mut index: u16 = eoi+1; // Next code number
-		let mut codesize: u8 = minimum_size+1; // Current code size
-
-		//println!("cc: {} - eoi: {}", cc, eoi);
-
-		// Populate starting codes
-		for code in 0..cc {
-			dictionary.insert(vec![code as u8], code);
-		}
-
-		let mut iter = indicies.iter();
-		let mut out = BitStream::new();
-		let mut buffer = vec![*iter.next().unwrap()]; //TODO: Remove unwrap
-
-		//"Encoders should output a Clear code as the first code of each image data stream."
-		out.push_bits(codesize, cc);
-
-		//println!("Before Loop\n\tBuffer: {:?}\n\tCodesize:{}\n\tIndex:{}", buffer, codesize, index);
-
-		for next_code in iter {
-			buffer.push(*next_code);
-
-			//println!("Buffer: {:?} - Codesize:{} - Index:{}\n\tDict: {:?}", buffer, codesize, index, dictionary);
-
-			match dictionary.get(&buffer) {
-				Some(_code) => {
-					//println!("\tPresent!");
-					continue;
-				},
-				None => {
-					buffer.pop();
-					match dictionary.get(&buffer) {
-						Some(code) => {
-							out.push_bits(codesize, *code);
-							//println!("\tOutputting {} with {} bits. Buffer is {:?}", *code, codesize, buffer);
-
-							// Add new entry for buffer and increase the index
-							buffer.push(*next_code);
-							dictionary.insert(buffer, index);
-							index += 1;
-
-							// Increase code size if we should
-							if index-1 == 2u16.pow(codesize as u32) {
-								codesize += 1;
-							}
-
-							// Reset the buffer to only contain next_code
-							buffer = vec![*next_code];
-						},
-						None => panic!("No dictionary entry when there should be! Something is wrong!")
-					}
-				}
-			}
-		}
-
-		if buffer.len() > 0 {
-			match dictionary.get(&buffer) {
-				None => panic!("No codes left but not in the dictionary!"),
-				Some(code) => out.push_bits(codesize, *code)
-			}
-		}
-		out.push_bits(codesize, eoi);
-
-		out.vec()
-	}
+    pub fn encode(minimum_size: u8, indicies: &[u8]) -> Vec<u8> {
+        let mut dictionary: HashMap<Vec<u8>, u16> = HashMap::new();
+
+        let cc = 2u16.pow(minimum_size as u32);
+        let eoi = cc + 1;
+
+        // Fill dictionary with self-descriptive values
+        for value in 0..cc {
+            dictionary.insert(vec![value as u8], value);
+        }
+
+        let mut next_code = eoi + 1;
+        let mut code_size = minimum_size + 1;
+
+        let mut iter = indicies.into_iter();
+        let mut out = BitStream::new();
+        let mut buffer = vec![*iter.next().unwrap()];
+
+        out.push_bits(code_size, cc);
+
+        for &indicie in iter {
+            buffer.push(indicie);
+
+            if !dictionary.contains_key(&buffer) {
+                buffer.pop();
+
+                if let Some(&code) = dictionary.get(&buffer) {
+                    out.push_bits(code_size, code);
+
+                    // Put the code back and add the vec to the dict
+                    buffer.push(indicie);
+                    dictionary.insert(buffer.clone(), next_code);
+                    next_code += 1;
+
+                    // If the next_code can't fit in the code_size, we have to increase it
+                    if next_code - 1 == 2u16.pow(code_size as u32) {
+                        code_size += 1;
+                    }
+
+                    buffer.clear();
+                    buffer.push(indicie);
+                } else {
+                    unreachable!()
+                }
+            }
+        }
+
+        if buffer.len() > 0 {
+            match dictionary.get(&buffer) {
+                Some(&code) => out.push_bits(code_size, code),
+                None => {
+                    panic!("Codes left in the buffer but the buffer is not a valid dictionary key!")
+                }
+            }
+        }
+        out.push_bits(code_size, eoi);
+
+        out.vec()
+    }
 }
 
 #[cfg(test)]
 mod lzw_test {
-	use super::*;
+    use super::*;
 
-	#[test]
-	fn encode() {
-		let indicies = vec![0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0];
-		let output = vec![0x84, 0x1D, 0x81, 0x7A, 0x50];
+    #[test]
+    fn encode() {
+        let indicies = vec![0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0];
+        let output = vec![0x84, 0x1D, 0x81, 0x7A, 0x50];
 
-		let lzout = LZW::encode(2, &indicies);
+        let lzout = LZW::encode(2, &indicies);
 
-		assert_eq!(lzout, output);
-	}
+        assert_eq!(lzout, output);
+    }
+
+    #[test]
+    fn against_weezl() {
+        let indicies = vec![0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0];
+        let weezl = weezl::encode::Encoder::new(weezl::BitOrder::Lsb, 2)
+            .encode(&indicies)
+            .unwrap();
+        let us = LZW::encode(2, &indicies);
+
+        assert_eq!(weezl, us);
+    }
 }
 
 struct BitStream {
-	formed: Vec<u8>,
-	current: u8,
-	index: u8
+    formed: Vec<u8>,
+    current: u8,
+    index: u8,
 }
 
 impl BitStream {
-	fn new() -> Self {
-		Self {
-			formed: vec![],
-			current: 0,
-			index: 0
-		}
-	}
-
-	fn push_bits(&mut self, count: u8, data: u16) {
-		let mut new_index = self.index + count;
-		let mut current32 = (self.current as u32) | ((data as u32) << self.index);
-
-		loop {
-			if new_index >= 8 {
-				self.formed.push(current32 as u8);
-				current32 = current32 >> 8;
-				new_index -= 8;
-			} else {
-				self.current = current32 as u8;
-				self.index = new_index;
-
-				break;
-			}
-		}
-	}
-
-	fn vec(self) -> Vec<u8> {
-		let mut out = self.formed;
-
-		if self.index != 0 {
-			out.push(self.current);
-		}
-
-		out
-	}
+    fn new() -> Self {
+        Self {
+            formed: vec![],
+            current: 0,
+            index: 0,
+        }
+    }
+
+    fn push_bits(&mut self, count: u8, data: u16) {
+        let mut new_index = self.index + count;
+        let mut current32 = (self.current as u32) | ((data as u32) << self.index);
+
+        loop {
+            if new_index >= 8 {
+                self.formed.push(current32 as u8);
+                current32 = current32 >> 8;
+                new_index -= 8;
+            } else {
+                self.current = current32 as u8;
+                self.index = new_index;
+
+                break;
+            }
+        }
+    }
+
+    fn vec(self) -> Vec<u8> {
+        let mut out = self.formed;
+
+        if self.index != 0 {
+            out.push(self.current);
+        }
+
+        out
+    }
 }
 
 #[cfg(test)]
 mod bitstream_test {
-	use super::*;
-
-	#[test]
-	fn short_push() {
-		let mut bs = BitStream::new();
-		bs.push_bits(2, 3);
-		bs.push_bits(2, 3);
-		bs.push_bits(3, 1);
-		bs.push_bits(2, 3);
-
-		let bsvec = bs.vec();
-
-		for byte in &bsvec {
-			print!("{:b} ", byte);
-		}
-		println!("");
-
-		assert_eq!(bsvec, vec![0b1001_1111, 0b0000_0001]);
-	}
-
-	#[test]
-	fn long_push() {
-		let mut bs = BitStream::new();
-		bs.push_bits(1, 1);
-		bs.push_bits(12, 2049);
-
-		let bsvec = bs.vec();
-
-		for byte in &bsvec {
-			print!("{:b} ", byte);
-		}
-		println!("");
-
-		assert_eq!(bsvec, vec![0b0000_0011, 0b0001_0000]);
-	}
-}
\ No newline at end of file
+    use super::*;
+
+    #[test]
+    fn short_push() {
+        let mut bs = BitStream::new();
+        bs.push_bits(2, 3);
+        bs.push_bits(2, 3);
+        bs.push_bits(3, 1);
+        bs.push_bits(2, 3);
+
+        let bsvec = bs.vec();
+
+        for byte in &bsvec {
+            print!("{:b} ", byte);
+        }
+        println!("");
+
+        assert_eq!(bsvec, vec![0b1001_1111, 0b0000_0001]);
+    }
+
+    #[test]
+    fn long_push() {
+        let mut bs = BitStream::new();
+        bs.push_bits(1, 1);
+        bs.push_bits(12, 2049);
+
+        let bsvec = bs.vec();
+
+        for byte in &bsvec {
+            print!("{:b} ", byte);
+        }
+        println!("");
+
+        assert_eq!(bsvec, vec![0b0000_0011, 0b0001_0000]);
+    }
+}