use core::fmt; pub struct Html { pub nodes: Vec, } impl Html { pub fn parse>(raw: S) -> Self { let mut raw = raw.as_ref(); let mut nodes = vec![]; loop { let Consumed { node, remaining } = Self::parse_node(raw); nodes.push(node); match remaining { None => break Self { nodes }, Some(rem) => raw = rem, } } } fn parse_node(raw: &str) -> Consumed { match Self::is_tag(raw) { Some(_) => { if let Some(cmt) = Self::parse_comment(raw) { cmt } else { Self::parse_tag(raw) } } None => { let cons = Self::parse_text(raw); cons } } } fn parse_tag(raw: &str) -> Consumed { let (root_tag, mut rest) = Self::is_tag(raw).unwrap(); let mut tag = if root_tag.body.is_empty() { Tag { name: root_tag.name.to_owned(), body: None, children: vec![], } } else { Tag { name: root_tag.name.into(), body: Some(root_tag.body.to_owned()), children: vec![], } }; if root_tag.closing { panic!( "found closing tag when not expected! {:?}\n{raw}", root_tag.name ) } else if root_tag.self_closing { return Consumed { node: Node::Tag(tag), remaining: rest, }; } loop { // Special case "; let special = Html::special_parse(basic, "script"); assert_eq!(special.unwrap().0, "words words\n"); assert!(special.unwrap().1.is_empty()); } #[test] fn special_parse_correctly_ignore_non_start() { let nonstart = "first_line\nlet end = '';\n"; let special = Html::special_parse(nonstart, "script"); assert!(special.is_none()); } #[test] fn special_parse_correctly_handles_leading_whitespace() { let white = "words words\n \t\t"; let special = Html::special_parse(white, "script"); assert_eq!(special.unwrap().0, "words words\n \t\t"); } #[test] fn parse_node_parses_comment() { let cmt = ""; let node = Html::parse_node(cmt); assert_eq!(node.node, comment!(" Comment! ")); } #[test] fn parse_node_parses_tag() { let basic = "

Hello!

"; let hh = Html::parse_node(basic); assert_eq!(hh.node, tag!("p", [text!("Hello!")])) } #[test] fn parse_node_parses_nested_tags() { let nested = "

Hello!

"; let hh = Html::parse_node(nested); assert_eq!(hh.node, tag!("p", [tag!("p", [text!("Hello!")])])) } #[test] fn parse_multiple_toplevel() { let nested = "

Hello

World!

"; let hh = Html::parse(nested); assert_eq!( hh.nodes, vec![tag!("p", [text!("Hello ")]), tag!("p", [text!("World!")])] ) } #[test] fn parse_script() { let raw = "\n\t\n"; let hh = Html::parse(raw); assert_eq!( hh.nodes, vec![tag!( "head", [ text!("\n\t"), tag!("script", [text!("let k=\"v\";\n\t")]), text!("\n") ] )] ) } #[test] fn parse_external_script() { let raw = "\n\t\n"; let hh = Html::parse(raw); assert_eq!( hh.nodes, vec![tag!( "head", [ text!("\n\t"), tag!("script", "src=\"script.js\""), text!("\n") ] )] ) } } #[cfg(test)] mod tag_test { use crate::Tag; #[test] fn tag_finds_boolen_attribute() { let tag = Tag { name: "div".into(), body: Some("contenteditable".into()), children: vec![], }; assert!(tag.get_attribute("contenteditable").is_some()) } #[test] fn tag_finds_kv_attribute() { let tag = Tag { name: "script".into(), body: Some("src=\"script.js\"".into()), children: vec![], }; assert_eq!(tag.get_attribute("src"), Some("script.js")) } #[test] fn tag_finds_attribute_with_self_close() { let tag = Tag { name: "link".into(), body: Some("href=\"style.css\" /".into()), children: vec![], }; assert_eq!(tag.get_attribute("href"), Some("style.css")) } #[test] fn tag_finds_boolean_in_centre() { let tag = Tag { name: "div".into(), body: Some("id=\"divy\" contenteditable style=\"display: none;\" /".into()), children: vec![], }; assert!(tag.get_attribute("contenteditable").is_some()); } }