about summary refs log tree commit diff
path: root/gaudio/src/mp3/mod.rs
blob: fe6433f8df30c4c2874fc7f9e169dd98af1793d2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
use std::{
	io::{BufRead, BufReader, Cursor, ErrorKind, Read},
	time::Duration,
};

use crate::mp3::bitrate::Bitrate;

mod bitrate;

/// Destroy an MP3, ripping it's frames apart. Also removes any ID3v2 tags
/// because who needs metadata?
pub struct Breaker {
	pub frames: Vec<Frame>,
}

impl Breaker {
	pub fn new() -> Self {
		Self { frames: vec![] }
	}

	pub fn split(&mut self, data: Vec<u8>) -> Result<(), std::io::Error> {
		let cursor = Cursor::new(data);
		let mut reader = BufReader::new(cursor);

		let mut consumed = 0;
		loop {
			print!("[{consumed:06X}] reading... ");
			let mut three = [0x00, 0x00, 0x00];
			if let Err(e) = reader.read_exact(&mut three) {
				if e.kind() == ErrorKind::UnexpectedEof {
					println!("out of bytes!");
					break;
				} else {
					println!("failed!");
					return Err(e);
				}
			}
			consumed += 3;

			if &three == b"ID3" {
				println!("found ID3v2!");
				Self::skip_id3v2(&mut reader, &mut consumed)?
			} else if three[0] == 0xFF && three[1] & 0b1110_0000 == 0b1110_0000 {
				print!("Have header - ");
				let mut one_more = [0x00];
				reader.read_exact(&mut one_more)?;
				consumed += 1;

				let header =
					Header::from_bytes([three[0], three[1], three[2], one_more[0]]).unwrap();
				let dat_len = header.data_length();
				let mut data = vec![0; dat_len];
				reader.read_exact(&mut data)?;
				consumed += dat_len;
				let frame = Frame { header, data };

				println!(
					"{}kbps {}kHz {:<4}bytes [{}ms]",
					frame.header.bitrate.kbps().unwrap(),
					frame.header.samplerate.freq() / 1000,
					frame.header.length(),
					frame.duration().as_millis()
				);

				self.frames.push(frame);
			} else {
				println!("unsynced!");
				panic!()
			}
		}

		Ok(())
	}

	/// Assumes the ident "TAG" was already consumed
	fn skip_id3v2<R: BufRead>(reader: &mut R, consumed: &mut usize) -> Result<(), std::io::Error> {
		// We don't actually want this, but want to get rid of it.
		let mut version_and_flags = [0x00, 0x00, 0x00];
		reader.read_exact(&mut version_and_flags)?;
		*consumed += 3;

		println!(
			"Version {} Revision {}",
			version_and_flags[0], version_and_flags[1]
		);

		let mut syncsafe_size = [0x00, 0x00, 0x00, 0x00];
		reader.read_exact(&mut syncsafe_size)?;
		*consumed += 4;

		// Size is MSB
		let mut size = syncsafe_size[3] as u32;
		// Shift right eight, but back one because most significant bit is 0 due to syncsafe
		size |= (syncsafe_size[2] as u32) << 7;
		size |= (syncsafe_size[1] as u32) << 14;
		size |= (syncsafe_size[0] as u32) << 21;

		let human = if size > 1024 * 1024 {
			format!("{:.2}MiB", size as f32 / (1024.0 * 1024.0))
		} else if size > 1024 {
			format!("{:.2}KiB", size as f32 / 1024.0)
		} else {
			format!("{size}B")
		};

		println!("ID3v2 size is {human} bytes");

		// Make a vec size big. We're not here to be efficient, sorry if this dissapoint you.
		let mut skip = vec![0x00; size as usize];
		reader.read_exact(&mut skip)?;
		*consumed += size as usize;

		Ok(())
	}
}

pub struct Frame {
	pub header: Header,
	pub data: Vec<u8>,
}

impl Frame {
	/// The number of moments-in-time this frame represents. This is constant
	/// and related to the [Layer]
	pub fn sample_count(&self) -> usize {
		// http://www.datavoyage.com/mpgscript/mpeghdr.htm
		// > Frame size is the number of samples contained in a frame. It is
		// > constant and always 384 samples for Layer I and 1152 samples for
		// > Layer II and Layer III.
		match self.header.layer {
			Layer::Reserved => panic!(),
			Layer::Layer1 => 384,
			Layer::Layer2 | Layer::Layer3 => 1152,
		}
	}

	/// Compute the duration of this audio frame
	pub fn duration(&self) -> Duration {
		let millis = (self.sample_count() * 1000) / self.header.samplerate.freq();
		Duration::from_millis(millis as u64)
	}
}

pub struct Header {
	// I only want to parse what i need, but we need this for writing out, so
	pub raw: [u8; 4],
	pub version: Version,
	pub layer: Layer,
	pub crc: bool,
	pub bitrate: Bitrate,
	pub samplerate: SampleRate,
	pub pad: bool,
}

impl Header {
	pub fn from_bytes(raw: [u8; 4]) -> Result<Self, Error> {
		if raw[0] != 0xFF || raw[1] & 0b1110_0000 != 0b1110_0000 {
			return Err(Error::HeaderUnsync);
		}

		//TODO: gen- yell if the version and layer aren't V1 L3?
		let version = Version::from_packed(raw[1]);
		let layer = Layer::from_packed(raw[1]);
		// CRC is 2bytes and directly follows the frame header
		let crc = raw[1] & 1 == 0;
		let bitrate = Bitrate::resolve(raw[2], version, layer)?;
		let samplerate = SampleRate::from_packed(raw[2]);

		if let SampleRate::Reserved = samplerate {
			return Err(Error::SampleRateReserve);
		}

		let pad = raw[2] & 2 > 0;

		//TODO: gen- love, you were trying to get the size of the data field. We need
		//to know the sampling rate and the pad bit for that, which happen to be the
		//next three bits.

		//Things i did not parse because i do not care about them:
		// - private bit
		// - channels
		// - mode extension
		// - copyright (lol)
		// - original (lmfao)
		// - emphasis

		Ok(Self {
			raw,
			version,
			layer,
			crc,
			bitrate,
			samplerate,
			pad,
		})
	}

	// Algorithm taken from:
	// http://www.multiweb.cz/twoinches/mp3inside.htm
	/// The length of the header and data
	pub fn length(&self) -> usize {
		// what, do we not care about crc? won't it add 2 bytes?
		let size = (144 * self.bitrate.bitrate().unwrap()) / self.samplerate.freq();
		if self.pad {
			size + 1
		} else {
			size
		}
	}

	/// The length of the audio data. This is just the length - 4
	pub fn data_length(&self) -> usize {
		self.length() - 4
	}
}

#[derive(Debug, thiserror::Error)]
pub enum Error {
	#[error("tried to parse header, but first 11 bits were not 1; not synced!")]
	HeaderUnsync,
	#[error("The version or the layer was a reserved value")]
	BitrateReserve,
	#[error("Bitrate bits were all 1")]
	BitrateBad,
	#[error("SampleRate was a reserved value")]
	SampleRateReserve,
}

#[derive(Copy, Clone, Debug)]
pub enum Version {
	Mpeg2_5,
	Reserved,
	Mpeg2,
	Mpeg1,
}

impl Version {
	/// Parse the Version from the second byte of the frame header
	fn from_packed(byte: u8) -> Self {
		#[allow(clippy::unusual_byte_groupings)]
		match byte & 0b000_11_000 {
			0b000_00_000 => Version::Mpeg2_5,
			0b000_01_000 => Version::Reserved,
			0b000_10_000 => Version::Mpeg2,
			0b000_11_000 => Version::Mpeg1,
			_ => unreachable!(),
		}
	}
}

#[derive(Copy, Clone, Debug)]
pub enum Layer {
	Reserved,
	Layer3,
	Layer2,
	Layer1,
}

impl Layer {
	/// Parse the Layer from the second byte of the frame header.
	fn from_packed(byte: u8) -> Self {
		#[allow(clippy::unusual_byte_groupings)]
		match byte & 0b000_00_110 {
			0b000_00_000 => Layer::Reserved,
			0b000_00_010 => Layer::Layer3,
			0b000_00_100 => Layer::Layer2,
			0b000_00_110 => Layer::Layer1,
			_ => unreachable!(),
		}
	}
}

#[derive(Copy, Clone, Debug)]
pub enum SampleRate {
	Hz44100,
	Hz48000,
	Hz32000,
	Reserved,
}

impl SampleRate {
	/// Parse the SampleRate from the third byte of the frame header
	fn from_packed(byte: u8) -> Self {
		#[allow(clippy::unusual_byte_groupings)]
		match byte & 0b0000_11_0_0 {
			0b0000_00_0_0 => SampleRate::Hz44100,
			0b0000_01_0_0 => SampleRate::Hz48000,
			0b0000_10_0_0 => SampleRate::Hz32000,
			0b0000_11_0_0 => SampleRate::Reserved,
			_ => unreachable!(),
		}
	}

	pub fn freq(&self) -> usize {
		match self {
			SampleRate::Hz44100 => 44100,
			SampleRate::Hz48000 => 48000,
			SampleRate::Hz32000 => 32000,
			SampleRate::Reserved => {
				panic!("sample rate was a reserved value; unable to determien a frequency")
			}
		}
	}
}