diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/lib.rs | 188 |
1 files changed, 145 insertions, 43 deletions
diff --git a/src/lib.rs b/src/lib.rs index 26ec5dd..f1010ae 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -43,12 +43,14 @@ impl Html { Tag { name: root_tag.name.to_owned(), body: None, + self_closing: root_tag.self_closing, children: vec![], } } else { Tag { name: root_tag.name.into(), body: Some(root_tag.body.to_owned()), + self_closing: root_tag.self_closing, children: vec![], } }; @@ -65,11 +67,14 @@ impl Html { }; } + println!("Looking for {}", root_tag.name); + loop { // Special case <script> and <style> if root_tag.name == "script" && tag.get_attribute("src").is_none() || root_tag.name == "style" { + println!("\tparse special"); let special = Self::special_parse(rest.unwrap(), root_tag.name); match special { @@ -95,6 +100,7 @@ impl Html { // Find the closing end of out root_tag if let Some((parsed, remaining)) = Self::is_tag(rest.unwrap()) { if parsed.closing && parsed.name == root_tag.name { + println!("\tclosed {}", parsed.name); break Consumed { node: Node::Tag(tag), remaining, @@ -165,43 +171,64 @@ impl Html { }; let tag_innards = &raw[1..idx]; - let close = tag_innards.starts_with('/'); - let self_close_idx = { - let possible = &tag_innards[1..]; - let last_whole_char = possible.rfind(|c: char| !c.is_ascii_whitespace()).map(|n| n + 1); - let last_slash = possible.rfind('/').map(|n| n + 1); - match (last_slash, last_whole_char) { - (Some(slash), Some(whole)) if slash == whole => { - Some(slash) - }, - _ => None + // Tag is closing. Don't check for self-close + // as they cannot be on the same tag + if let Some(closing) = tag_innards.strip_prefix('/') { + match closing.find(' ') { + None => return Some((ParsedTag { + closing: true, + self_closing: false, + name: closing, + body: "" + }, rest)), + Some(idx) => { + let name = &closing[..idx]; + let body = &closing[idx..]; + + return Some((ParsedTag{ + closing: true, + self_closing: false, + name, + body + }, rest)) + } } - }; - let self_close = self_close_idx.is_some(); - // can't close and self_close - if close && self_close { return None; } - - // clean the close from the raw string - let name_raw = if let Some(close_idx) = self_close_idx { - &tag_innards[..close_idx] - } else if close { - &tag_innards[1..] - } else { - tag_innards - }; + } - let (name, body) = match name_raw.find(|c: char| c.is_ascii_whitespace()){ + if let Some(closing) = tag_innards.strip_suffix('/') { + match closing.find(' ') { + None => return Some((ParsedTag { + closing: false, + self_closing: true, + name: closing, + body: "" + }, rest)), + Some(idx) => { + let name = &closing[..idx]; + let body = &closing[idx+1..]; + + return Some((ParsedTag{ + closing: false, + self_closing: true, + name, + body + }, rest)) + } + } + } + + let (name, body) = match tag_innards.find(' '){ None => { - (name_raw, "") + (tag_innards, "") }, Some(idx) => { - (&name_raw[..idx], &name_raw[idx+1..]) + (&tag_innards[..idx], &tag_innards[idx+1..]) } }; Some((ParsedTag{ - closing: close, - self_closing: self_close, + closing: false, + self_closing: false, name, body }, rest)) @@ -242,6 +269,15 @@ impl Html { } } +impl fmt::Display for Html { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for node in &self.nodes { + write!(f, "{node}")?; + } + Ok(()) + } +} + struct Consumed<'a> { node: Node, remaining: Option<&'a str>, @@ -263,21 +299,29 @@ pub enum Node { Comment(String), } +impl fmt::Display for Node { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Text(txt) => write!(f, "{txt}"), + Self::Tag(tag) => write!(f, "{tag}"), + Self::Comment(cmt) => write!(f, "<!--{cmt}-->"), + } + } +} + #[derive(Clone, Debug, PartialEq)] pub struct Tag { pub name: String, /// Everything inside the tag that's not it's name. Includes a /// self-close if there is one. pub body: Option<String>, + pub self_closing: bool, pub children: Vec<Node>, } impl Tag { pub fn self_closing(&self) -> bool { - self.body - .as_deref() - .map(|s| s.trim_end().ends_with('/')) - .unwrap_or(false) + self.self_closing } pub fn get_attribute<'a>(&'a self, key: &str) -> Option<&'a str> { @@ -340,12 +384,41 @@ impl Tag { } } +impl fmt::Display for Tag { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let Tag { + name, + body, + self_closing, + children, + } = self; + + let formatted_body = if let Some(body) = body { + format!(" {body}") + } else { + String::from("") + }; + + if *self_closing { + // we ignore our children if we're self-closing. + write!(f, "<{name} {}/>", body.as_deref().unwrap_or_default()) + } else { + write!(f, "<{name}{formatted_body}>")?; + for child in children { + write!(f, "{}", child)?; + } + write!(f, "</{name}>") + } + } +} + #[macro_export] macro_rules! tag { ($name:expr) => { $crate::Node::Tag($crate::Tag { name: String::from($name), body: None, + self_closing: false, children: vec![], }) }; @@ -354,6 +427,7 @@ macro_rules! tag { $crate::Node::Tag($crate::Tag { name: String::from($name), body: None, + self_closing: false, children: vec![$($children),+], }) }; @@ -362,6 +436,7 @@ macro_rules! tag { $crate::Node::Tag($crate::Tag { name: String::from($name), body: Some(String::from($body)), + self_closing: false, children: vec![], }) }; @@ -370,6 +445,7 @@ macro_rules! tag { $crate::Node::Tag($crate::Tag { name: String::from($name), body: Some(String::from($body)), + self_closing: false, children: vec![$($children),+], }) }; @@ -529,6 +605,39 @@ mod test { )] ) } + + fn test_roundtrip(raw: &str) { + let html = Html::parse(raw); + let string = html.to_string(); + + for (raw, html) in raw.lines().zip(string.lines()) { + assert_eq!(raw, html) + } + } + + #[test] + fn round_trip_simple() { + test_roundtrip("<p>Hello!</p>") + } + + #[test] + fn round_trip_complex() { + test_roundtrip( + r#" +<html> + <head> + <link rel="style.css"/> + <title>Title!</title> + <script> + alert("hello!"); + </script> + </head> + <body> + <p>Hello, <i>World!</i></p> + </body> +</html>"#, + ) + } } #[cfg(test)] @@ -540,6 +649,7 @@ mod tag_test { let tag = Tag { name: "div".into(), body: Some("contenteditable".into()), + self_closing: false, children: vec![], }; assert!(tag.get_attribute("contenteditable").is_some()) @@ -550,26 +660,18 @@ mod tag_test { let tag = Tag { name: "script".into(), body: Some("src=\"script.js\"".into()), + self_closing: false, children: vec![], }; assert_eq!(tag.get_attribute("src"), Some("script.js")) } #[test] - fn tag_finds_attribute_with_self_close() { - let tag = Tag { - name: "link".into(), - body: Some("href=\"style.css\" /".into()), - children: vec![], - }; - assert_eq!(tag.get_attribute("href"), Some("style.css")) - } - - #[test] fn tag_finds_boolean_in_centre() { let tag = Tag { name: "div".into(), - body: Some("id=\"divy\" contenteditable style=\"display: none;\" /".into()), + body: Some("id=\"divy\" contenteditable style=\"display: none;\"".into()), + self_closing: false, children: vec![], }; assert!(tag.get_attribute("contenteditable").is_some()); |