src/lzw.rs


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175

use std::collections::HashMap;

pub struct LZW {}
impl LZW {
	//TODO: Cleanup and make not awful
	pub fn encode(minimum_size: u8, indicies: &[u8]) -> Vec<u8> {
		let mut dictionary: HashMap<Vec<u8>, u16> = HashMap::new();

		let cc: u16 = 2u16.pow(minimum_size as u32);
		let eoi: u16 = cc+1;

		let mut index: u16 = eoi+1; // Next code number
		let mut codesize: u8 = minimum_size+1; // Current code size

		//println!("cc: {} - eoi: {}", cc, eoi);

		// Populate starting codes
		for code in 0..cc {
			dictionary.insert(vec![code as u8], code);
		}

		let mut iter = indicies.iter();
		let mut out = BitStream::new();
		let mut buffer = vec![*iter.next().unwrap()]; //TODO: Remove unwrap

		//"Encoders should output a Clear code as the first code of each image data stream."
		out.push_bits(codesize, cc);

		//println!("Before Loop\n\tBuffer: {:?}\n\tCodesize:{}\n\tIndex:{}", buffer, codesize, index);

		for next_code in iter {
			buffer.push(*next_code);

			//println!("Buffer: {:?} - Codesize:{} - Index:{}\n\tDict: {:?}", buffer, codesize, index, dictionary);

			match dictionary.get(&buffer) {
				Some(_code) => {
					//println!("\tPresent!");
					continue;
				},
				None => {
					buffer.pop();
					match dictionary.get(&buffer) {
						Some(code) => {
							out.push_bits(codesize, *code);
							//println!("\tOutputting {} with {} bits. Buffer is {:?}", *code, codesize, buffer);

							// Add new entry for buffer and increase the index
							buffer.push(*next_code);
							dictionary.insert(buffer, index);
							index += 1;

							// Increase code size if we should
							if index-1 == 2u16.pow(codesize as u32) {
								codesize += 1;
							}

							// Reset the buffer to only contain next_code
							buffer = vec![*next_code];
						},
						None => panic!("No dictionary entry when there should be! Something is wrong!")
					}
				}
			}
		}

		if buffer.len() > 0 {
			match dictionary.get(&buffer) {
				None => panic!("No codes left but not in the dictionary!"),
				Some(code) => out.push_bits(codesize, *code)
			}
		}
		out.push_bits(codesize, eoi);

		out.vec()
	}
}

#[cfg(test)]
mod lzw_test {
	use super::*;

	#[test]
	fn encode() {
		let indicies = vec![0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0];
		let output = vec![0x84, 0x1D, 0x81, 0x7A, 0x50];

		let lzout = LZW::encode(2, &indicies);

		assert_eq!(lzout, output);
	}
}

struct BitStream {
	formed: Vec<u8>,
	current: u8,
	index: u8
}

impl BitStream {
	fn new() -> Self {
		Self {
			formed: vec![],
			current: 0,
			index: 0
		}
	}

	fn push_bits(&mut self, count: u8, data: u16) {
		let mut new_index = self.index + count;
		let mut current32 = (self.current as u32) | ((data as u32) << self.index);

		loop {
			if new_index >= 8 {
				self.formed.push(current32 as u8);
				current32 = current32 >> 8;
				new_index -= 8;
			} else {
				self.current = current32 as u8;
				self.index = new_index;

				break;
			}
		}
	}

	fn vec(self) -> Vec<u8> {
		let mut out = self.formed;

		if self.index != 0 {
			out.push(self.current);
		}

		out
	}
}

#[cfg(test)]
mod bitstream_test {
	use super::*;

	#[test]
	fn short_push() {
		let mut bs = BitStream::new();
		bs.push_bits(2, 3);
		bs.push_bits(2, 3);
		bs.push_bits(3, 1);
		bs.push_bits(2, 3);

		let bsvec = bs.vec();

		for byte in &bsvec {
			print!("{:b} ", byte);
		}
		println!("");

		assert_eq!(bsvec, vec![0b1001_1111, 0b0000_0001]);
	}

	#[test]
	fn long_push() {
		let mut bs = BitStream::new();
		bs.push_bits(1, 1);
		bs.push_bits(12, 2049);

		let bsvec = bs.vec();

		for byte in &bsvec {
			print!("{:b} ", byte);
		}
		println!("");

		assert_eq!(bsvec, vec![0b0000_0011, 0b0001_0000]);
	}
}