src/lzw.rs


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173

use std::collections::HashMap;

pub struct LZW {}
impl LZW {
	pub fn encode(minimum_size: u8, indicies: &[u8]) -> Vec<u8> {
		let mut dictionary: HashMap<Vec<u8>, u16> = HashMap::new();

		let cc = 2u16.pow(minimum_size as u32);
		let eoi = cc + 1;

		// Fill dictionary with self-descriptive values
		for value in 0..cc {
			dictionary.insert(vec![value as u8], value);
		}

		let mut next_code = eoi + 1;
		let mut code_size = minimum_size + 1;

		let mut iter = indicies.into_iter();
		let mut out = BitStream::new();
		let mut buffer = vec![*iter.next().unwrap()];

		out.push_bits(code_size, cc);

		for &indicie in iter {
			buffer.push(indicie);

			if !dictionary.contains_key(&buffer) {
				buffer.pop();

				if let Some(&code) = dictionary.get(&buffer) {
					out.push_bits(code_size, code);

					// Put the code back and add the vec to the dict
					buffer.push(indicie);
					dictionary.insert(buffer.clone(), next_code);
					next_code += 1;

					// If the next_code can't fit in the code_size, we have to increase it
					if next_code - 1 == 2u16.pow(code_size as u32) {
						code_size += 1;
					}

					buffer.clear();
					buffer.push(indicie);
				} else {
					unreachable!()
				}
			}
		}

		if buffer.len() > 0 {
			match dictionary.get(&buffer) {
				Some(&code) => out.push_bits(code_size, code),
				None => {
					panic!("Codes left in the buffer but the buffer is not a valid dictionary key!")
				}
			}
		}
		out.push_bits(code_size, eoi);

		out.vec()
	}
}

#[cfg(test)]
mod lzw_test {
	use super::*;

	#[test]
	fn encode() {
		let indicies = vec![0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0];
		let output = vec![0x84, 0x1D, 0x81, 0x7A, 0x50];

		let lzout = LZW::encode(2, &indicies);

		assert_eq!(lzout, output);
	}

	#[test]
	fn against_weezl() {
		let indicies = vec![0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0];
		let weezl = weezl::encode::Encoder::new(weezl::BitOrder::Lsb, 2)
			.encode(&indicies)
			.unwrap();
		let us = LZW::encode(2, &indicies);

		assert_eq!(weezl, us);
	}
}

struct BitStream {
	formed: Vec<u8>,
	current: u8,
	index: u8,
}

impl BitStream {
	fn new() -> Self {
		Self {
			formed: vec![],
			current: 0,
			index: 0,
		}
	}

	fn push_bits(&mut self, count: u8, data: u16) {
		let mut new_index = self.index + count;
		let mut current32 = (self.current as u32) | ((data as u32) << self.index);

		loop {
			if new_index >= 8 {
				self.formed.push(current32 as u8);
				current32 = current32 >> 8;
				new_index -= 8;
			} else {
				self.current = current32 as u8;
				self.index = new_index;

				break;
			}
		}
	}

	fn vec(self) -> Vec<u8> {
		let mut out = self.formed;

		if self.index != 0 {
			out.push(self.current);
		}

		out
	}
}

#[cfg(test)]
mod bitstream_test {
	use super::*;

	#[test]
	fn short_push() {
		let mut bs = BitStream::new();
		bs.push_bits(2, 3);
		bs.push_bits(2, 3);
		bs.push_bits(3, 1);
		bs.push_bits(2, 3);

		let bsvec = bs.vec();

		for byte in &bsvec {
			print!("{:b} ", byte);
		}
		println!("");

		assert_eq!(bsvec, vec![0b1001_1111, 0b0000_0001]);
	}

	#[test]
	fn long_push() {
		let mut bs = BitStream::new();
		bs.push_bits(1, 1);
		bs.push_bits(12, 2049);

		let bsvec = bs.vec();

		for byte in &bsvec {
			print!("{:b} ", byte);
		}
		println!("");

		assert_eq!(bsvec, vec![0b0000_0011, 0b0001_0000]);
	}
}