diff options
Diffstat (limited to 'src/markup.rs')
-rw-r--r-- | src/markup.rs | 218 |
1 files changed, 218 insertions, 0 deletions
diff --git a/src/markup.rs b/src/markup.rs new file mode 100644 index 0000000..4e0d66e --- /dev/null +++ b/src/markup.rs @@ -0,0 +1,218 @@ +struct State { + active_id: Option<String>, + paragraphs: bool, + + processed: String, + current: String, + escaped_html: bool, + last_blank: bool, +} + +impl Default for State { + fn default() -> Self { + Self { + active_id: None, + paragraphs: true, + + processed: String::new(), + current: String::new(), + escaped_html: false, + last_blank: true, + } + } +} + +impl State { + /// Get an opening paragraph tag with any attributes currently set. + fn get_open_paragraph(&mut self) -> String { + match self.active_id.take() { + None => String::from("<p>"), + Some(id) => format!(r#"<p id="{id}">"#), + } + } + + pub fn process_line(&mut self, line: &str) { + // we check !paragraphs here because we need to be able to enable it again + // and the easiest way right now seems to be to try to parse every + // non-paragraph line as a command + if (self.last_blank || !self.paragraphs) && self.parse_command(line) { + // don't set last_blank here. we want to be able to chain commands + return; + } + + if !self.paragraphs || !line.is_empty() { + if !self.current.is_empty() { + self.current.push('\n'); + } + + let escaped = self.escape_line(line); + self.current.push_str(escaped); + } else { + // line is empty. + self.push_current(); + } + + self.last_blank = false; + } + + pub fn done(mut self) -> String { + self.push_current(); + self.processed + } + + fn escape_line<'a>(&mut self, line: &'a str) -> &'a str { + if let Some(strip) = line.strip_prefix('\\') { + match line.chars().next() { + Some('[') => strip, + Some('<') => { + if self.last_blank { + self.escaped_html = true; + } + + strip + } + Some('\\') => strip, + _ => line, + } + } else { + line + } + } + + /// Possibly parses a line as a command and mutates internal state. + /// # Returns + /// true if the line was a command, false otherwise. + fn parse_command(&mut self, line: &str) -> bool { + match line.strip_prefix('[') { + Some(line) => match line.strip_suffix(']') { + Some(cmd) => self.run_command(cmd), + None => false, + }, + None => false, + } + } + + fn run_command(&mut self, cmd: &str) -> bool { + match cmd.trim() { + "@paragraphs off" => { + self.push_current(); + self.paragraphs = false; + true + } + "@paragraphs on" => { + self.push_current(); + self.paragraphs = true; + true + } + annotation if cmd.starts_with('#') => { + self.active_id = Some(annotation[1..].to_owned()); + true + } + _ => false, + } + } + + fn push_current(&mut self) { + if !self.current.is_empty() { + // linebreak if there is already text pushed to final + if !self.processed.is_empty() { + self.processed.push('\n'); + } + + // wrap paragraphs if all of these are true: + // - we're supposed to be wrapping paragraphs + // - either of these is true: + // - the line does not start with < + // OR + // - the line starts with < AND it's been escaped + let should_paragraph = self.paragraphs + && (!self.current.starts_with('<') + || (self.current.starts_with('<') && self.escaped_html)); + + if should_paragraph { + let open = self.get_open_paragraph(); + self.processed + .push_str(&format!("{open}\n{}\n</p>", self.current)); + } else { + self.processed.push_str(&self.current); + } + + // reset block dependant state + self.current.clear(); + self.last_blank = true; + self.escaped_html = false; + } + } +} + +pub fn process(raw: &str) -> String { + let mut state = State::default(); + + for line in raw.lines() { + state.process_line(line) + } + + state.done() +} + +#[cfg(test)] +mod test { + use camino::Utf8PathBuf; + + use crate::markup::process; + + #[test] + fn parses_no_commands() { + let blk1 = "line one\nline two"; + let blk2 = "block two"; + + assert_eq!(process(blk1), format!("<p>\n{blk1}\n</p>")); + + let tst = format!("{blk1}\n\n{blk2}"); + assert_eq!( + process(&tst), + format!("<p>\n{blk1}\n</p>\n<p>\n{blk2}\n</p>") + ) + } + + #[test] + fn parses_paragraph_off() { + let str = "[@paragraphs off]\none two\n\nthree\nfour"; + assert_eq!(process(str), "one two\n\nthree\nfour") + } + + #[test] + fn parses_adds_annotation() { + let str = "[#greeting]\nHello!"; + assert_eq!(process(str), "<p id=\"greeting\">\nHello!\n</p>") + } + + #[test] + fn doesnt_wrap_html() { + let str = "hello!\n\n<i>hi, how are you?</i>"; + assert_eq!(process(str), "<p>\nhello!\n</p>\n<i>hi, how are you?</i>") + } + + #[test] + fn correctly_escapes() { + let str = "\\[@paragraph on]\n\\<i>Hello!</i>\n\\\\Goodbye!"; + let correct = "<p>\n[@paragraph on]\n<i>Hello!</i>\n\\Goodbye!\n</p>"; + assert_eq!(process(str), correct) + } + + const BASE: &str = "test/markup"; + fn test_files(test: &str) { + let input_path = format!("{BASE}/{test}/input.html"); + let output_path = format!("{BASE}/{test}/output.html"); + + let input = std::fs::read_to_string(input_path).unwrap(); + let output = std::fs::read_to_string(output_path).unwrap(); + + assert_eq!(process(&input), output) + } + + #[test] + fn parses_onoff() { + test_files("paragraph toggle") + } +} |