about summary refs log tree commit diff
path: root/src/markup.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/markup.rs')
-rw-r--r--src/markup.rs218
1 files changed, 218 insertions, 0 deletions
diff --git a/src/markup.rs b/src/markup.rs
new file mode 100644
index 0000000..4e0d66e
--- /dev/null
+++ b/src/markup.rs
@@ -0,0 +1,218 @@
+struct State {
+	active_id: Option<String>,
+	paragraphs: bool,
+
+	processed: String,
+	current: String,
+	escaped_html: bool,
+	last_blank: bool,
+}
+
+impl Default for State {
+	fn default() -> Self {
+		Self {
+			active_id: None,
+			paragraphs: true,
+
+			processed: String::new(),
+			current: String::new(),
+			escaped_html: false,
+			last_blank: true,
+		}
+	}
+}
+
+impl State {
+	/// Get an opening paragraph tag with any attributes currently set.
+	fn get_open_paragraph(&mut self) -> String {
+		match self.active_id.take() {
+			None => String::from("<p>"),
+			Some(id) => format!(r#"<p id="{id}">"#),
+		}
+	}
+
+	pub fn process_line(&mut self, line: &str) {
+		// we check !paragraphs here because we need to be able to enable it again
+		// and the easiest way right now seems to be to try to parse every
+		// non-paragraph line as a command
+		if (self.last_blank || !self.paragraphs) && self.parse_command(line) {
+			// don't set last_blank here. we want to be able to chain commands
+			return;
+		}
+
+		if !self.paragraphs || !line.is_empty() {
+			if !self.current.is_empty() {
+				self.current.push('\n');
+			}
+
+			let escaped = self.escape_line(line);
+			self.current.push_str(escaped);
+		} else {
+			// line is empty.
+			self.push_current();
+		}
+
+		self.last_blank = false;
+	}
+
+	pub fn done(mut self) -> String {
+		self.push_current();
+		self.processed
+	}
+
+	fn escape_line<'a>(&mut self, line: &'a str) -> &'a str {
+		if let Some(strip) = line.strip_prefix('\\') {
+			match line.chars().next() {
+				Some('[') => strip,
+				Some('<') => {
+					if self.last_blank {
+						self.escaped_html = true;
+					}
+
+					strip
+				}
+				Some('\\') => strip,
+				_ => line,
+			}
+		} else {
+			line
+		}
+	}
+
+	/// Possibly parses a line as a command and mutates internal state.
+	/// # Returns
+	/// true if the line was a command, false otherwise.
+	fn parse_command(&mut self, line: &str) -> bool {
+		match line.strip_prefix('[') {
+			Some(line) => match line.strip_suffix(']') {
+				Some(cmd) => self.run_command(cmd),
+				None => false,
+			},
+			None => false,
+		}
+	}
+
+	fn run_command(&mut self, cmd: &str) -> bool {
+		match cmd.trim() {
+			"@paragraphs off" => {
+				self.push_current();
+				self.paragraphs = false;
+				true
+			}
+			"@paragraphs on" => {
+				self.push_current();
+				self.paragraphs = true;
+				true
+			}
+			annotation if cmd.starts_with('#') => {
+				self.active_id = Some(annotation[1..].to_owned());
+				true
+			}
+			_ => false,
+		}
+	}
+
+	fn push_current(&mut self) {
+		if !self.current.is_empty() {
+			// linebreak if there is already text pushed to final
+			if !self.processed.is_empty() {
+				self.processed.push('\n');
+			}
+
+			// wrap paragraphs if all of these are true:
+			// - we're supposed to be wrapping paragraphs
+			// - either of these is true:
+			//   - the line does not start with <
+			//      OR
+			//   - the line starts with < AND it's been escaped
+			let should_paragraph = self.paragraphs
+				&& (!self.current.starts_with('<')
+					|| (self.current.starts_with('<') && self.escaped_html));
+
+			if should_paragraph {
+				let open = self.get_open_paragraph();
+				self.processed
+					.push_str(&format!("{open}\n{}\n</p>", self.current));
+			} else {
+				self.processed.push_str(&self.current);
+			}
+
+			// reset block dependant state
+			self.current.clear();
+			self.last_blank = true;
+			self.escaped_html = false;
+		}
+	}
+}
+
+pub fn process(raw: &str) -> String {
+	let mut state = State::default();
+
+	for line in raw.lines() {
+		state.process_line(line)
+	}
+
+	state.done()
+}
+
+#[cfg(test)]
+mod test {
+	use camino::Utf8PathBuf;
+
+	use crate::markup::process;
+
+	#[test]
+	fn parses_no_commands() {
+		let blk1 = "line one\nline two";
+		let blk2 = "block two";
+
+		assert_eq!(process(blk1), format!("<p>\n{blk1}\n</p>"));
+
+		let tst = format!("{blk1}\n\n{blk2}");
+		assert_eq!(
+			process(&tst),
+			format!("<p>\n{blk1}\n</p>\n<p>\n{blk2}\n</p>")
+		)
+	}
+
+	#[test]
+	fn parses_paragraph_off() {
+		let str = "[@paragraphs off]\none two\n\nthree\nfour";
+		assert_eq!(process(str), "one two\n\nthree\nfour")
+	}
+
+	#[test]
+	fn parses_adds_annotation() {
+		let str = "[#greeting]\nHello!";
+		assert_eq!(process(str), "<p id=\"greeting\">\nHello!\n</p>")
+	}
+
+	#[test]
+	fn doesnt_wrap_html() {
+		let str = "hello!\n\n<i>hi, how are you?</i>";
+		assert_eq!(process(str), "<p>\nhello!\n</p>\n<i>hi, how are you?</i>")
+	}
+
+	#[test]
+	fn correctly_escapes() {
+		let str = "\\[@paragraph on]\n\\<i>Hello!</i>\n\\\\Goodbye!";
+		let correct = "<p>\n[@paragraph on]\n<i>Hello!</i>\n\\Goodbye!\n</p>";
+		assert_eq!(process(str), correct)
+	}
+
+	const BASE: &str = "test/markup";
+	fn test_files(test: &str) {
+		let input_path = format!("{BASE}/{test}/input.html");
+		let output_path = format!("{BASE}/{test}/output.html");
+
+		let input = std::fs::read_to_string(input_path).unwrap();
+		let output = std::fs::read_to_string(output_path).unwrap();
+
+		assert_eq!(process(&input), output)
+	}
+
+	#[test]
+	fn parses_onoff() {
+		test_files("paragraph toggle")
+	}
+}