diff options
author | gennyble <gen@nyble.dev> | 2023-12-27 15:13:52 -0600 |
---|---|---|
committer | gennyble <gen@nyble.dev> | 2023-12-27 15:13:52 -0600 |
commit | 848b70d0cf67ccbc4634b4f28ada285d6691eecd (patch) | |
tree | f94206ae534be7c43e5154bbe9902fc87833cd38 /src/lib.rs | |
parent | f4e186c2946c40ffbc03dfaf383e067333fc0852 (diff) | |
download | cutie-848b70d0cf67ccbc4634b4f28ada285d6691eecd.tar.gz cutie-848b70d0cf67ccbc4634b4f28ada285d6691eecd.zip |
actually special-case script/style?
I thought this was the last commit, honestly?
Diffstat (limited to 'src/lib.rs')
-rw-r--r-- | src/lib.rs | 102 |
1 files changed, 81 insertions, 21 deletions
diff --git a/src/lib.rs b/src/lib.rs index 31e8a17..36a9075 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -60,16 +60,36 @@ impl Html { let mut children = vec![]; loop { - match Self::is_tag(rest.unwrap()) { - Some(( - ParsedTag { - closing: true, - name, - self_closing: false, - .. - }, - remaining, - )) if name == root_tag.name => { + // Special case <script> and <style> + if root_tag.name == "script" || root_tag.name == "style" { + let special = Self::special_parse(rest.unwrap(), root_tag.name); + + match special { + None => { + panic!("found tag '{}' with no end", root_tag.name); + } + Some((text, remaining)) => { + let remaining = if remaining.is_empty() { + None + } else { + Some(remaining) + }; + + return Consumed { + node: Node::Tag { + self_closing: false, + name: root_tag.name.into(), + children: vec![text!(text)], + }, + remaining, + }; + } + } + } + + // Find the closing end of out root_tag + if let Some((parsed, remaining)) = Self::is_tag(rest.unwrap()) { + if parsed.closing && parsed.name == root_tag.name { break Consumed { node: Node::Tag { self_closing: false, @@ -79,13 +99,38 @@ impl Html { remaining, }; } - _ => { - let cons = Self::parse_node(rest.unwrap()); - rest = cons.remaining; - children.push(cons.node); + } + + // Not our closing root? parse and push + let cons = Self::parse_node(rest.unwrap()); + rest = cons.remaining; + children.push(cons.node); + } + } + + fn special_parse<'a>(mut raw: &'a str, looking_for_name: &str) -> Option<(&'a str, &'a str)> { + let close = format!("</{looking_for_name}>"); + + let mut offset = 0; + loop { + match raw[offset..].find('\n') { + None => return None, + Some(nl_idx) => { + offset += nl_idx + 1; + match raw[offset..].find(|c: char| !c.is_ascii_whitespace()) { + None => return None, + Some(whole_idx) => { + let whole_start = &raw[offset + whole_idx..]; + if let Some(stripped) = whole_start.strip_prefix(&close) { + return Some((&raw[..offset + whole_idx], stripped)); + } + } + } } } } + + None } fn parse_comment(raw: &str) -> Option<Consumed> { @@ -212,13 +257,6 @@ struct ParsedTag<'a> { self_closing: bool, } -impl<'a> ParsedTag<'a> { - /// Whether or not this tag closes or self-closes - pub fn closes(&self) -> bool { - self.closing || self.self_closing - } -} - #[derive(Debug, PartialEq)] pub enum Node { Text(String), @@ -294,6 +332,28 @@ mod test { } #[test] + fn special_parse_find_tag_end() { + let basic = "words words\n</script>"; + let special = Html::special_parse(basic, "script"); + assert_eq!(special.unwrap().0, "words words\n"); + assert!(special.unwrap().1.is_empty()); + } + + #[test] + fn special_parse_correctly_ignore_non_start() { + let nonstart = "first_line\nlet end = '</script>';\n"; + let special = Html::special_parse(nonstart, "script"); + assert!(special.is_none()); + } + + #[test] + fn special_parse_correctly_handles_leading_whitespace() { + let white = "words words\n \t\t</script>"; + let special = Html::special_parse(white, "script"); + assert_eq!(special.unwrap().0, "words words\n \t\t"); + } + + #[test] fn parse_node_parses_comment() { let cmt = "<!-- Comment! -->"; |