about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--Cargo.lock58
-rw-r--r--Cargo.toml1
-rw-r--r--src/lib.rs149
-rw-r--r--src/query.rs185
-rw-r--r--src/tag.rs177
5 files changed, 429 insertions, 141 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 0006c5b..5aa59d6 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -5,3 +5,61 @@ version = 3
 [[package]]
 name = "cutie"
 version = "0.1.0"
+dependencies = [
+ "thiserror",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.71"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "75cb1540fadbd5b8fbccc4dddad2734eba435053f725621c070711a14bb5f4b8"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.33"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "syn"
+version = "2.0.43"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ee659fb5f3d355364e1f3e5bc10fb82068efbf824a1e9d1c9504244a6469ad53"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "thiserror"
+version = "1.0.52"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "83a48fd946b02c0a526b2e9481c8e2a17755e47039164a86c4070446e3a4614d"
+dependencies = [
+ "thiserror-impl",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "1.0.52"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e7fbe9b594d6568a6a1443250a7e67d80b74e1e96f6d1715e1e21cc1888291d3"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
diff --git a/Cargo.toml b/Cargo.toml
index f8822b4..36541e3 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -7,3 +7,4 @@ license = "ISC"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [dependencies]
+thiserror = "1.0.52"
diff --git a/src/lib.rs b/src/lib.rs
index f1010ae..df7eead 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,4 +1,8 @@
 use core::fmt;
+use tag::Tag;
+
+mod query;
+mod tag;
 
 pub struct Html {
 	pub nodes: Vec<Node>,
@@ -21,6 +25,10 @@ impl Html {
 		}
 	}
 
+	pub fn query<S: AsRef<str>>(&self, query: S) -> Option<&Node> {
+		todo!()
+	}
+
 	fn parse_node(raw: &str) -> Consumed {
 		match Self::is_tag(raw) {
 			Some(_) => {
@@ -309,109 +317,6 @@ impl fmt::Display for Node {
 	}
 }
 
-#[derive(Clone, Debug, PartialEq)]
-pub struct Tag {
-	pub name: String,
-	/// Everything inside the tag that's not it's name. Includes a
-	/// self-close if there is one.
-	pub body: Option<String>,
-	pub self_closing: bool,
-	pub children: Vec<Node>,
-}
-
-impl Tag {
-	pub fn self_closing(&self) -> bool {
-		self.self_closing
-	}
-
-	pub fn get_attribute<'a>(&'a self, key: &str) -> Option<&'a str> {
-		let body = match self.body.as_deref() {
-			None => return None,
-			Some(body) => body,
-		};
-
-		// get rid of potential self-close
-		let trimmed = if let Some(suffix) = body.trim().strip_suffix('/') {
-			suffix
-		} else {
-			body.trim()
-		};
-
-		let mut wrk = trimmed;
-		loop {
-			let key_end_idx = wrk.find(|c: char| c == ' ' || c == '=');
-
-			match key_end_idx {
-				None => {
-					// boolean ends body
-					if wrk == key {
-						return Some("");
-					} else {
-						break;
-					}
-				}
-				Some(idx) => match &wrk[idx..idx + 1] {
-					" " => {
-						// boolean
-						if &wrk[..idx] == key {
-							return Some("");
-						} else {
-							wrk = &wrk[idx + 1..];
-						}
-					}
-					"=" => {
-						// key-value
-						let found_name = &wrk[..idx];
-
-						// we're just assuming the attributes are properly
-						// formed right now. Skips the `=` and the `"` that
-						// should be there but we don't check for
-						wrk = &wrk[idx + 2..];
-						let end = wrk.find('"').unwrap();
-						let value = &wrk[..end];
-						wrk = &wrk[end + 1..].trim_start();
-
-						if found_name == key {
-							return Some(value);
-						}
-					}
-					_ => unreachable!(),
-				},
-			}
-		}
-
-		None
-	}
-}
-
-impl fmt::Display for Tag {
-	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-		let Tag {
-			name,
-			body,
-			self_closing,
-			children,
-		} = self;
-
-		let formatted_body = if let Some(body) = body {
-			format!(" {body}")
-		} else {
-			String::from("")
-		};
-
-		if *self_closing {
-			// we ignore our children if we're self-closing.
-			write!(f, "<{name} {}/>", body.as_deref().unwrap_or_default())
-		} else {
-			write!(f, "<{name}{formatted_body}>")?;
-			for child in children {
-				write!(f, "{}", child)?;
-			}
-			write!(f, "</{name}>")
-		}
-	}
-}
-
 #[macro_export]
 macro_rules! tag {
 	($name:expr) => {
@@ -639,41 +544,3 @@ mod test {
 		)
 	}
 }
-
-#[cfg(test)]
-mod tag_test {
-	use crate::Tag;
-
-	#[test]
-	fn tag_finds_boolen_attribute() {
-		let tag = Tag {
-			name: "div".into(),
-			body: Some("contenteditable".into()),
-			self_closing: false,
-			children: vec![],
-		};
-		assert!(tag.get_attribute("contenteditable").is_some())
-	}
-
-	#[test]
-	fn tag_finds_kv_attribute() {
-		let tag = Tag {
-			name: "script".into(),
-			body: Some("src=\"script.js\"".into()),
-			self_closing: false,
-			children: vec![],
-		};
-		assert_eq!(tag.get_attribute("src"), Some("script.js"))
-	}
-
-	#[test]
-	fn tag_finds_boolean_in_centre() {
-		let tag = Tag {
-			name: "div".into(),
-			body: Some("id=\"divy\" contenteditable style=\"display: none;\"".into()),
-			self_closing: false,
-			children: vec![],
-		};
-		assert!(tag.get_attribute("contenteditable").is_some());
-	}
-}
diff --git a/src/query.rs b/src/query.rs
new file mode 100644
index 0000000..796e372
--- /dev/null
+++ b/src/query.rs
@@ -0,0 +1,185 @@
+#[derive(Clone, Debug, PartialEq)]
+pub enum QueryComponent {
+	/// Every child element of the tag
+	TagName(String),
+	/// Only direct children with the tag
+	DirectTagName(String),
+	/// The child element with the ID
+	Id(String),
+	/// Only direct children with the tag
+	DirectId(String),
+	/// Every child that has the class
+	Class(String),
+	/// Only direct children with the class
+	DirectClass(String),
+}
+
+pub fn parse_query(mut raw: &str) -> Result<Vec<QueryComponent>, QueryParseError> {
+	let mut components = vec![];
+
+	let mut next_direct = false;
+	loop {
+		if raw.is_empty() {
+			break Ok(components);
+		}
+
+		let part = match raw.find(['>', ' ']) {
+			None => {
+				let part = raw;
+				raw = &raw[raw.len()..raw.len()];
+				part
+			}
+			Some(idx) => {
+				let part = &raw[..idx];
+
+				if &raw[idx..idx + 1] == ">" {
+					if next_direct {
+						return Err(QueryParseError::DoubleDirect);
+					} else {
+						next_direct = true;
+					}
+				}
+
+				raw = &raw[idx + 1..];
+				part
+			}
+		};
+
+		if part.is_empty() {
+			continue;
+		}
+
+		if let Some(id) = part.strip_prefix('#') {
+			if id.contains(['#', '.']) {
+				return Err(QueryParseError::UnknownComponent {
+					malformed: id.into(),
+				});
+			}
+
+			if next_direct {
+				components.push(QueryComponent::DirectId(id.into()));
+				next_direct = false;
+			} else {
+				components.push(QueryComponent::Id(id.into()));
+			}
+		} else if let Some(class) = part.strip_prefix('.') {
+			if class.contains(['#', '.']) {
+				return Err(QueryParseError::UnknownComponent {
+					malformed: class.into(),
+				});
+			}
+
+			if next_direct {
+				components.push(QueryComponent::DirectClass(class.into()));
+				next_direct = false;
+			} else {
+				components.push(QueryComponent::Class(class.into()));
+			}
+		} else {
+			if part.contains(['#', '.']) {
+				return Err(QueryParseError::UnknownComponent {
+					malformed: part.into(),
+				});
+			}
+
+			if next_direct {
+				components.push(QueryComponent::DirectTagName(part.into()));
+				next_direct = false;
+			} else {
+				components.push(QueryComponent::TagName(part.into()));
+			}
+		}
+	}
+}
+
+#[derive(Debug, thiserror::Error)]
+pub enum QueryParseError {
+	#[error("Query ends with '>' which does not make sense. Are you missing a selector?")]
+	EndsInDirect,
+	#[error("Two direct descendent selectors (>) appeard together")]
+	DoubleDirect,
+	#[error(
+		"The component {malformed} does not make sense. Valid selectors are #id, .class, and tag"
+	)]
+	UnknownComponent { malformed: String },
+}
+
+#[cfg(test)]
+mod test {
+	use super::parse_query;
+
+	macro_rules! qc {
+		($tag:expr) => {
+			$crate::query::QueryComponent::TagName(String::from($tag))
+		};
+
+		(>$tag:expr) => {
+			$crate::query::QueryComponent::DirectTagName(String::from($tag))
+		};
+
+		(ID $tag:expr) => {
+			$crate::query::QueryComponent::Id(String::from($tag))
+		};
+
+		(>ID $tag:expr) => {
+			$crate::query::QueryComponent::DirectId(String::from($tag))
+		};
+
+		(. $tag:expr) => {
+			$crate::query::QueryComponent::Class(String::from($tag))
+		};
+
+		(>. $tag:expr) => {
+			$crate::query::QueryComponent::DirectClass(String::from($tag))
+		};
+	}
+
+	#[test]
+	fn parses_tags() {
+		let raw = "main section p";
+		let parse = parse_query(raw).unwrap();
+		assert_eq!(parse, vec![qc!("main"), qc!("section"), qc!("p")])
+	}
+
+	#[test]
+	fn parses_direct_tags() {
+		let raw = "main > section > p";
+		let parse = parse_query(raw).unwrap();
+		assert_eq!(parse, vec![qc!("main"), qc!(> "section"), qc!(> "p")])
+	}
+
+	#[test]
+	fn parses_id() {
+		let raw = "main #job";
+		let parse = parse_query(raw).unwrap();
+		assert_eq!(parse, vec![qc!("main"), qc!(ID "job")])
+	}
+
+	#[test]
+	fn parses_direct_id() {
+		let raw = "main > #job";
+		let parse = parse_query(raw).unwrap();
+		assert_eq!(parse, vec![qc!("main"), qc!(>ID "job")])
+	}
+
+	#[test]
+	fn parses_class() {
+		let raw = "main .post";
+		let parse = parse_query(raw).unwrap();
+		assert_eq!(parse, vec![qc!("main"), qc!(."post")])
+	}
+
+	#[test]
+	fn parses_direct_class() {
+		let raw = "main > .post";
+		let parse = parse_query(raw).unwrap();
+		assert_eq!(parse, vec![qc!("main"), qc!(>."post")])
+	}
+
+	#[test]
+	fn parses_complex() {
+		let raw = "main > article";
+		let parse = parse_query(raw).unwrap();
+		assert_eq!(parse, vec![qc!("main"), qc!(>."post")])
+	}
+}
diff --git a/src/tag.rs b/src/tag.rs
new file mode 100644
index 0000000..e325b1f
--- /dev/null
+++ b/src/tag.rs
@@ -0,0 +1,177 @@
+use core::fmt;
+
+use crate::Node;
+
+#[derive(Clone, Debug, PartialEq)]
+pub struct Tag {
+	pub name: String,
+	/// Everything inside the tag that's not it's name. Includes a
+	/// self-close if there is one.
+	pub body: Option<String>,
+	pub self_closing: bool,
+	pub children: Vec<Node>,
+}
+
+impl Tag {
+	pub fn self_closing(&self) -> bool {
+		self.self_closing
+	}
+
+	pub fn get_attribute<'a>(&'a self, key: &str) -> Option<&'a str> {
+		let body = match self.body.as_deref() {
+			None => return None,
+			Some(body) => body,
+		};
+
+		// get rid of potential self-close
+		let trimmed = if let Some(suffix) = body.trim().strip_suffix('/') {
+			suffix
+		} else {
+			body.trim()
+		};
+
+		let mut wrk = trimmed;
+		loop {
+			let key_end_idx = wrk.find(|c: char| c == ' ' || c == '=');
+
+			match key_end_idx {
+				None => {
+					// boolean ends body
+					if wrk == key {
+						return Some("");
+					} else {
+						break;
+					}
+				}
+				Some(idx) => match &wrk[idx..idx + 1] {
+					" " => {
+						// boolean
+						if &wrk[..idx] == key {
+							return Some("");
+						} else {
+							wrk = &wrk[idx + 1..];
+						}
+					}
+					"=" => {
+						// key-value
+						let found_name = &wrk[..idx];
+
+						// we're just assuming the attributes are properly
+						// formed right now. Skips the `=` and the `"` that
+						// should be there but we don't check for
+						wrk = &wrk[idx + 2..];
+						let end = wrk.find('"').unwrap();
+						let value = &wrk[..end];
+						wrk = &wrk[end + 1..].trim_start();
+
+						if found_name == key {
+							return Some(value);
+						}
+					}
+					_ => unreachable!(),
+				},
+			}
+		}
+
+		None
+	}
+
+	pub fn id(&self) -> Option<&str> {
+		match self.get_attribute("id") {
+			None => None,
+			Some("") => None,
+			Some(id) => Some(id),
+		}
+	}
+
+	pub fn has_class(&self, name: &str) -> bool {
+		match self.get_attribute("class") {
+			None => false,
+			Some(classes) => {
+				for class in classes.split(' ') {
+					if class == name {
+						return true;
+					}
+				}
+
+				false
+			}
+		}
+	}
+
+	pub fn append_child<N: Into<Node>>(&mut self, node: N) {
+		let node = node.into();
+		self.children.push(node);
+	}
+
+	/// Replace all children with one [Node::Text]
+	pub fn set_inner_text<S: Into<String>>(&mut self, txt: S) {
+		self.children = vec![Node::Text(txt.into())];
+	}
+}
+
+impl fmt::Display for Tag {
+	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+		let Tag {
+			name,
+			body,
+			self_closing,
+			children,
+		} = self;
+
+		let formatted_body = if let Some(body) = body {
+			format!(" {body}")
+		} else {
+			String::from("")
+		};
+
+		if *self_closing {
+			// we ignore our children if we're self-closing.
+			write!(f, "<{name} {}/>", body.as_deref().unwrap_or_default())
+		} else {
+			write!(f, "<{name}{formatted_body}>")?;
+			for child in children {
+				write!(f, "{}", child)?;
+			}
+			write!(f, "</{name}>")
+		}
+	}
+}
+
+#[cfg(test)]
+mod test {
+	use crate::Tag;
+
+	#[test]
+	fn tag_finds_boolen_attribute() {
+		let tag = Tag {
+			name: "div".into(),
+			body: Some("contenteditable".into()),
+			self_closing: false,
+			children: vec![],
+		};
+		assert!(tag.get_attribute("contenteditable").is_some())
+	}
+
+	#[test]
+	fn tag_finds_kv_attribute() {
+		let tag = Tag {
+			name: "script".into(),
+			body: Some("src=\"script.js\"".into()),
+			self_closing: false,
+			children: vec![],
+		};
+		assert_eq!(tag.get_attribute("src"), Some("script.js"))
+	}
+
+	#[test]
+	fn tag_finds_boolean_in_centre() {
+		let tag = Tag {
+			name: "div".into(),
+			body: Some("id=\"divy\" contenteditable style=\"display: none;\"".into()),
+			self_closing: false,
+			children: vec![],
+		};
+		assert!(tag.get_attribute("contenteditable").is_some());
+	}
+}