use core::fmt; pub struct Html { pub nodes: Vec, } impl Html { pub fn parse>(raw: S) -> Self { let mut raw = raw.as_ref(); let mut nodes = vec![]; loop { let Consumed { node, remaining } = Self::parse_node(raw); nodes.push(node); match remaining { None => break Self { nodes }, Some(rem) => raw = rem, } } } fn parse_node(raw: &str) -> Consumed { match Self::is_tag(raw) { Some(_) => { if let Some(cmt) = Self::parse_comment(raw) { cmt } else { Self::parse_tag(raw) } } None => { let cons = Self::parse_text(raw); cons } } } fn parse_tag(raw: &str) -> Consumed { let (root_tag, mut rest) = Self::is_tag(raw).unwrap(); let mut tag = if root_tag.body.is_empty() { Tag { name: root_tag.name.to_owned(), body: None, self_closing: root_tag.self_closing, children: vec![], } } else { Tag { name: root_tag.name.into(), body: Some(root_tag.body.to_owned()), self_closing: root_tag.self_closing, children: vec![], } }; if root_tag.closing { panic!( "found closing tag when not expected! {:?}\n{raw}", root_tag.name ) } else if root_tag.self_closing { return Consumed { node: Node::Tag(tag), remaining: rest, }; } println!("Looking for {}", root_tag.name); loop { // Special case "; let special = Html::special_parse(basic, "script"); assert_eq!(special.unwrap().0, "words words\n"); assert!(special.unwrap().1.is_empty()); } #[test] fn special_parse_correctly_ignore_non_start() { let nonstart = "first_line\nlet end = '';\n"; let special = Html::special_parse(nonstart, "script"); assert!(special.is_none()); } #[test] fn special_parse_correctly_handles_leading_whitespace() { let white = "words words\n \t\t"; let special = Html::special_parse(white, "script"); assert_eq!(special.unwrap().0, "words words\n \t\t"); } #[test] fn parse_node_parses_comment() { let cmt = ""; let node = Html::parse_node(cmt); assert_eq!(node.node, comment!(" Comment! ")); } #[test] fn parse_node_parses_tag() { let basic = "

Hello!

"; let hh = Html::parse_node(basic); assert_eq!(hh.node, tag!("p", [text!("Hello!")])) } #[test] fn parse_node_parses_nested_tags() { let nested = "

Hello!

"; let hh = Html::parse_node(nested); assert_eq!(hh.node, tag!("p", [tag!("p", [text!("Hello!")])])) } #[test] fn parse_multiple_toplevel() { let nested = "

Hello

World!

"; let hh = Html::parse(nested); assert_eq!( hh.nodes, vec![tag!("p", [text!("Hello ")]), tag!("p", [text!("World!")])] ) } #[test] fn parse_script() { let raw = "\n\t\n"; let hh = Html::parse(raw); assert_eq!( hh.nodes, vec![tag!( "head", [ text!("\n\t"), tag!("script", [text!("let k=\"v\";\n\t")]), text!("\n") ] )] ) } #[test] fn parse_external_script() { let raw = "\n\t\n"; let hh = Html::parse(raw); assert_eq!( hh.nodes, vec![tag!( "head", [ text!("\n\t"), tag!("script", "src=\"script.js\""), text!("\n") ] )] ) } fn test_roundtrip(raw: &str) { let html = Html::parse(raw); let string = html.to_string(); for (raw, html) in raw.lines().zip(string.lines()) { assert_eq!(raw, html) } } #[test] fn round_trip_simple() { test_roundtrip("

Hello!

") } #[test] fn round_trip_complex() { test_roundtrip( r#" Title!

Hello, World!

"#, ) } } #[cfg(test)] mod tag_test { use crate::Tag; #[test] fn tag_finds_boolen_attribute() { let tag = Tag { name: "div".into(), body: Some("contenteditable".into()), self_closing: false, children: vec![], }; assert!(tag.get_attribute("contenteditable").is_some()) } #[test] fn tag_finds_kv_attribute() { let tag = Tag { name: "script".into(), body: Some("src=\"script.js\"".into()), self_closing: false, children: vec![], }; assert_eq!(tag.get_attribute("src"), Some("script.js")) } #[test] fn tag_finds_boolean_in_centre() { let tag = Tag { name: "div".into(), body: Some("id=\"divy\" contenteditable style=\"display: none;\"".into()), self_closing: false, children: vec![], }; assert!(tag.get_attribute("contenteditable").is_some()); } }