use core::fmt;
pub struct Html {
pub nodes: Vec,
}
impl Html {
pub fn parse>(raw: S) -> Self {
let mut raw = raw.as_ref();
let mut nodes = vec![];
loop {
let Consumed { node, remaining } = Self::parse_node(raw);
nodes.push(node);
match remaining {
None => break Self { nodes },
Some(rem) => raw = rem,
}
}
}
fn parse_node(raw: &str) -> Consumed {
match Self::is_tag(raw) {
Some(_) => {
if let Some(cmt) = Self::parse_comment(raw) {
cmt
} else {
Self::parse_tag(raw)
}
}
None => {
let cons = Self::parse_text(raw);
cons
}
}
}
fn parse_tag(raw: &str) -> Consumed {
let (root_tag, mut rest) = Self::is_tag(raw).unwrap();
let mut tag = if root_tag.body.is_empty() {
Tag {
name: root_tag.name.to_owned(),
body: None,
children: vec![],
}
} else {
Tag {
name: root_tag.name.into(),
body: Some(root_tag.body.to_owned()),
children: vec![],
}
};
if root_tag.closing {
panic!(
"found closing tag when not expected! {:?}\n{raw}",
root_tag.name
)
} else if root_tag.self_closing {
return Consumed {
node: Node::Tag(tag),
remaining: rest,
};
}
loop {
// Special case ";
let special = Html::special_parse(basic, "script");
assert_eq!(special.unwrap().0, "words words\n");
assert!(special.unwrap().1.is_empty());
}
#[test]
fn special_parse_correctly_ignore_non_start() {
let nonstart = "first_line\nlet end = '';\n";
let special = Html::special_parse(nonstart, "script");
assert!(special.is_none());
}
#[test]
fn special_parse_correctly_handles_leading_whitespace() {
let white = "words words\n \t\t";
let special = Html::special_parse(white, "script");
assert_eq!(special.unwrap().0, "words words\n \t\t");
}
#[test]
fn parse_node_parses_comment() {
let cmt = "";
let node = Html::parse_node(cmt);
assert_eq!(node.node, comment!(" Comment! "));
}
#[test]
fn parse_node_parses_tag() {
let basic = "Hello!
";
let hh = Html::parse_node(basic);
assert_eq!(hh.node, tag!("p", [text!("Hello!")]))
}
#[test]
fn parse_node_parses_nested_tags() {
let nested = "Hello!
";
let hh = Html::parse_node(nested);
assert_eq!(hh.node, tag!("p", [tag!("p", [text!("Hello!")])]))
}
#[test]
fn parse_multiple_toplevel() {
let nested = "Hello
World!
";
let hh = Html::parse(nested);
assert_eq!(
hh.nodes,
vec![tag!("p", [text!("Hello ")]), tag!("p", [text!("World!")])]
)
}
#[test]
fn parse_script() {
let raw = "\n\t\n";
let hh = Html::parse(raw);
assert_eq!(
hh.nodes,
vec![tag!(
"head",
[
text!("\n\t"),
tag!("script", [text!("let k=\"v\";\n\t")]),
text!("\n")
]
)]
)
}
#[test]
fn parse_external_script() {
let raw = "\n\t\n";
let hh = Html::parse(raw);
assert_eq!(
hh.nodes,
vec![tag!(
"head",
[
text!("\n\t"),
tag!("script", "src=\"script.js\""),
text!("\n")
]
)]
)
}
}
#[cfg(test)]
mod tag_test {
use crate::Tag;
#[test]
fn tag_finds_boolen_attribute() {
let tag = Tag {
name: "div".into(),
body: Some("contenteditable".into()),
children: vec![],
};
assert!(tag.get_attribute("contenteditable").is_some())
}
#[test]
fn tag_finds_kv_attribute() {
let tag = Tag {
name: "script".into(),
body: Some("src=\"script.js\"".into()),
children: vec![],
};
assert_eq!(tag.get_attribute("src"), Some("script.js"))
}
#[test]
fn tag_finds_attribute_with_self_close() {
let tag = Tag {
name: "link".into(),
body: Some("href=\"style.css\" /".into()),
children: vec![],
};
assert_eq!(tag.get_attribute("href"), Some("style.css"))
}
#[test]
fn tag_finds_boolean_in_centre() {
let tag = Tag {
name: "div".into(),
body: Some("id=\"divy\" contenteditable style=\"display: none;\" /".into()),
children: vec![],
};
assert!(tag.get_attribute("contenteditable").is_some());
}
}