From 4b4689ca0d086e11de52cbbf848fa0873acd7274 Mon Sep 17 00:00:00 2001 From: gennyble Date: Wed, 27 Dec 2023 17:18:12 -0600 Subject: rountrips! --- readme.md | 2 + src/lib.rs | 188 +++++++++++++++++++++++++++++++++++++++------------ tests/nyble_pages.rs | 22 ++++++ 3 files changed, 169 insertions(+), 43 deletions(-) diff --git a/readme.md b/readme.md index 725a71a..d0fc3b9 100644 --- a/readme.md +++ b/readme.md @@ -1,6 +1,8 @@ opinionated, standards non-compliant HTML parser meant to consume handwritten HTML. - all tags must close *(even `
`, ``, ``)* +- tag names must be separated from the tag-body *(where the attributes go)* by a space character *(` `, 0x20)* +- self-closing tags must have the closing `/` at the ver y end of the body *(directly before the `>`)* - no > in tags except at the end (not even in attributes) - inline ` + + +

Hello, World!

+ +"#, + ) + } } #[cfg(test)] @@ -540,6 +649,7 @@ mod tag_test { let tag = Tag { name: "div".into(), body: Some("contenteditable".into()), + self_closing: false, children: vec![], }; assert!(tag.get_attribute("contenteditable").is_some()) @@ -550,26 +660,18 @@ mod tag_test { let tag = Tag { name: "script".into(), body: Some("src=\"script.js\"".into()), + self_closing: false, children: vec![], }; assert_eq!(tag.get_attribute("src"), Some("script.js")) } - #[test] - fn tag_finds_attribute_with_self_close() { - let tag = Tag { - name: "link".into(), - body: Some("href=\"style.css\" /".into()), - children: vec![], - }; - assert_eq!(tag.get_attribute("href"), Some("style.css")) - } - #[test] fn tag_finds_boolean_in_centre() { let tag = Tag { name: "div".into(), - body: Some("id=\"divy\" contenteditable style=\"display: none;\" /".into()), + body: Some("id=\"divy\" contenteditable style=\"display: none;\"".into()), + self_closing: false, children: vec![], }; assert!(tag.get_attribute("contenteditable").is_some()); diff --git a/tests/nyble_pages.rs b/tests/nyble_pages.rs index 14ca1cc..2c99bc2 100644 --- a/tests/nyble_pages.rs +++ b/tests/nyble_pages.rs @@ -6,8 +6,30 @@ fn pares_homepage() { let html = Html::parse(&str); } +fn test_roundtrip(raw: &str) { + let html = Html::parse(raw); + let string = html.to_string(); + for (ln, (raw, round)) in raw.lines().zip(string.lines()).enumerate() { + if raw != round { + panic!("line {ln} differs!\n\traw: {raw}\n\ttrip: {round}") + } + } +} + +#[test] +fn homepage_roundtrip() { + let str = std::fs::read_to_string("tests/nyble.html").unwrap(); + test_roundtrip(&str); +} + #[test] fn parses_grass() { let str = std::fs::read_to_string("tests/touching_grass.html").unwrap(); let html = Html::parse(&str); } + +#[test] +fn grass_roundtrip() { + let str = std::fs::read_to_string("tests/touching_grass.html").unwrap(); + test_roundtrip(&str); +} -- cgit 1.4.1-3-g733a5