about summary refs log tree commit diff
path: root/src/lib.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib.rs')
-rw-r--r--src/lib.rs102
1 files changed, 81 insertions, 21 deletions
diff --git a/src/lib.rs b/src/lib.rs
index 31e8a17..36a9075 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -60,16 +60,36 @@ impl Html {
 		let mut children = vec![];
 
 		loop {
-			match Self::is_tag(rest.unwrap()) {
-				Some((
-					ParsedTag {
-						closing: true,
-						name,
-						self_closing: false,
-						..
-					},
-					remaining,
-				)) if name == root_tag.name => {
+			// Special case <script> and <style>
+			if root_tag.name == "script" || root_tag.name == "style" {
+				let special = Self::special_parse(rest.unwrap(), root_tag.name);
+
+				match special {
+					None => {
+						panic!("found tag '{}' with no end", root_tag.name);
+					}
+					Some((text, remaining)) => {
+						let remaining = if remaining.is_empty() {
+							None
+						} else {
+							Some(remaining)
+						};
+
+						return Consumed {
+							node: Node::Tag {
+								self_closing: false,
+								name: root_tag.name.into(),
+								children: vec![text!(text)],
+							},
+							remaining,
+						};
+					}
+				}
+			}
+
+			// Find the closing end of out root_tag
+			if let Some((parsed, remaining)) = Self::is_tag(rest.unwrap()) {
+				if parsed.closing && parsed.name == root_tag.name {
 					break Consumed {
 						node: Node::Tag {
 							self_closing: false,
@@ -79,13 +99,38 @@ impl Html {
 						remaining,
 					};
 				}
-				_ => {
-					let cons = Self::parse_node(rest.unwrap());
-					rest = cons.remaining;
-					children.push(cons.node);
+			}
+
+			// Not our closing root? parse and push
+			let cons = Self::parse_node(rest.unwrap());
+			rest = cons.remaining;
+			children.push(cons.node);
+		}
+	}
+
+	fn special_parse<'a>(mut raw: &'a str, looking_for_name: &str) -> Option<(&'a str, &'a str)> {
+		let close = format!("</{looking_for_name}>");
+
+		let mut offset = 0;
+		loop {
+			match raw[offset..].find('\n') {
+				None => return None,
+				Some(nl_idx) => {
+					offset += nl_idx + 1;
+					match raw[offset..].find(|c: char| !c.is_ascii_whitespace()) {
+						None => return None,
+						Some(whole_idx) => {
+							let whole_start = &raw[offset + whole_idx..];
+							if let Some(stripped) = whole_start.strip_prefix(&close) {
+								return Some((&raw[..offset + whole_idx], stripped));
+							}
+						}
+					}
 				}
 			}
 		}
+
+		None
 	}
 
 	fn parse_comment(raw: &str) -> Option<Consumed> {
@@ -212,13 +257,6 @@ struct ParsedTag<'a> {
 	self_closing: bool,
 }
 
-impl<'a> ParsedTag<'a> {
-	/// Whether or not this tag closes or self-closes
-	pub fn closes(&self) -> bool {
-		self.closing || self.self_closing
-	}
-}
-
 #[derive(Debug, PartialEq)]
 pub enum Node {
 	Text(String),
@@ -294,6 +332,28 @@ mod test {
 	}
 
 	#[test]
+	fn special_parse_find_tag_end() {
+		let basic = "words words\n</script>";
+		let special = Html::special_parse(basic, "script");
+		assert_eq!(special.unwrap().0, "words words\n");
+		assert!(special.unwrap().1.is_empty());
+	}
+
+	#[test]
+	fn special_parse_correctly_ignore_non_start() {
+		let nonstart = "first_line\nlet end = '</script>';\n";
+		let special = Html::special_parse(nonstart, "script");
+		assert!(special.is_none());
+	}
+
+	#[test]
+	fn special_parse_correctly_handles_leading_whitespace() {
+		let white = "words words\n  \t\t</script>";
+		let special = Html::special_parse(white, "script");
+		assert_eq!(special.unwrap().0, "words words\n  \t\t");
+	}
+
+	#[test]
 	fn parse_node_parses_comment() {
 		let cmt = "<!-- Comment! -->";