use std::str::FromStr; use camino::Utf8PathBuf; use scurvy::Argument; fn main() { let arguments = [ Argument::arg("timdb", "path").help("time database generated with whenwasit"), Argument::arg("part", "path").help("part converge is supposed to build"), ]; let cli = scurvy::parse(&arguments); let content: Utf8PathBuf = cli.parse_opt_or_die("part"); let supporting: Vec = cli.free_opts().into_iter().map(Utf8PathBuf::from).collect(); let time = std::time::Instant::now(); for idx in 0..100 { let supporting = supporting.clone(); let html = process(content.clone(), supporting); } println!("{}ms for 100", time.elapsed().as_millis()); //println!("{html}") } fn process(content_file: Utf8PathBuf, mut supporting: Vec) -> cutie::Html { let raw = std::fs::read_to_string(content_file).unwrap(); match Part::from_str(&raw) { Err(PartError::NoSetup) => cutie::Html::parse(raw), Err(e) => { eprintln!("{e}"); std::process::exit(1); } Ok(part) => { let setup = part.setup; let setup_path = match supporting .iter() .position(|p| p.file_name().unwrap() == setup) { None => { eprintln!("failed to find setup file {setup}"); std::process::exit(1); } Some(idx) => supporting.swap_remove(idx), }; let mut html = process(setup_path, supporting); for Action { command, content } in part.actions { let mut content_html = cutie::Html::parse(content); let ident = if let Identifier::Tag(tag) = command.identifier { tag } else { panic!() }; fn get_tag<'a>(html: &'a mut cutie::Html, ident: &str) -> &'a mut cutie::Tag { match html.get_by_tag_name_mut(&ident) { None => { eprintln!("error processing file"); eprintln!("failed to find element with tag {ident}"); std::process::exit(1); } Some(tag) => tag, } } fn get_parent_tag<'a>( html: &'a mut cutie::Html, ident: &str, ) -> &'a mut cutie::Tag { match html.get_parent_that_contains_tag_name_mut(ident) { None => { eprintln!("error processing file"); eprintln!("failed to find element with tag {ident}"); std::process::exit(1); } Some(tag) => tag, } } match command.opcode { Opcode::ReplaceChildren => { let tag = get_tag(&mut html, &ident); tag.children = content_html.nodes; } Opcode::Push => { let tag = get_tag(&mut html, &ident); tag.children.extend(content_html.nodes); } Opcode::Before => { let predicate = |node: &cutie::Node| -> bool { if let cutie::Node::Tag(tag) = node { if tag.name == ident { return true; } } false }; let parent = get_parent_tag(&mut html, &ident); match parent.children.iter().position(predicate) { None => panic!(), Some(idx) => { for node in content_html.nodes.into_iter().rev() { parent.children.insert(idx, node); } } } } Opcode::Replace => { let parent = get_parent_tag(&mut html, &ident); match parent.by_tag_mut(&ident) { None => { eprintln!("error processing file"); eprintln!("failed to find element with tag {ident}"); std::process::exit(1); } Some(tag) => match content_html.nodes.swap_remove(0) { cutie::Node::Tag(content_tag) => *tag = content_tag, _ => { eprintln!( "OpCode was Replace but no HTML tag as first content" ); std::process::exit(1); } }, } } } } html } } } pub struct Part { setup: Utf8PathBuf, actions: Vec, } #[derive(Clone, Debug, PartialEq)] pub struct Action { command: Command, content: String, } #[rustfmt::skip] // it was bothering me impl FromStr for Part { type Err = PartError; fn from_str(mut raw: &str) -> Result { let setup = if let Some(stripped) = raw.strip_prefix("Setup: ") { match stripped.find('\n') { None => return Err(PartError::NoSetup), Some(nl_idx) => { let path = &stripped[..nl_idx]; raw = &stripped[nl_idx + 1..]; Utf8PathBuf::from(path) } } } else { return Err(PartError::NoSetup) }; let mut actions = vec![]; loop { // Skip newlines between blocks while let Some("\n") = raw.get(0..1) { raw = &raw[1..]; } let command = match extract_command_from_start(raw)? { None => { if raw.trim().is_empty() { // permissive about the file ending in any whitespace break; } else { let line = raw.split('\n').next().unwrap_or(raw).to_owned(); return Err(PartError::IncorrectCommand { line }); } }, Some(ExtractedCommand { command, after }) => { raw = after; command } }; // Check for 1-line actions let (sla_line, sla_after) = match raw.find('\n') { None => { let line = raw.trim(); if line.is_empty() { return Err(PartError::EmptyAction); } else { (line, &raw[raw.len()..]) } }, Some(nl_idx) => { (raw[..nl_idx].trim(), &raw[nl_idx+1..]) } }; // We're a one-line action! if !sla_line.is_empty() { actions.push(Action { command, content: sla_line.to_owned() }); raw = sla_after; continue; } else { // not single-line, trim the front raw = raw.trim_start(); } // If we're here, it's a multiline action let mut consumed = 0; loop { let wrk = &raw[consumed..]; match wrk.find("\n\n") { None => { consumed = raw.len(); break; }, Some(dnl_idx) => { consumed += dnl_idx; if extract_command_from_start(&wrk[dnl_idx+2..])?.is_some() { break; } else { consumed += 2; } } } } let content = &raw[..consumed]; if content.trim().is_empty() { return Err(PartError::EmptyAction); } actions.push(Action { command, content: content.to_owned() }); raw = &raw[consumed..]; } Ok(Self { setup, actions }) } } fn extract_command_from_start(raw: &str) -> Result, PartError> { // we don't change the start here. indexing back into raw with an index from // line is safe. let line = match raw.find('\n') { None => raw, Some(nl_idx) => &raw[..nl_idx], }; Ok(match line.find('}') { None => None, Some(end_idx) => { if line.starts_with('{') { Some(ExtractedCommand { command: raw[1..end_idx].parse()?, after: &raw[end_idx + 1..], }) } else { None } } }) } #[derive(Clone, Debug, PartialEq)] struct Command { identifier: Identifier, opcode: Opcode, } impl FromStr for Command { type Err = PartError; fn from_str(s: &str) -> Result { let (raw_opcode, raw_identifier) = match s.split_once('-') { None => ("", s.trim()), Some((op, ident)) => (op.trim(), ident.trim()), }; let opcode = match raw_opcode { "" | "replace children" => Opcode::ReplaceChildren, "replace" => Opcode::Replace, "before" => Opcode::Before, "push" => Opcode::Push, _ => { return Err(PartError::InvalidOperation { op: raw_opcode.to_owned(), }) } }; Ok(Command { //TODO: allow operating with IDs identifier: Identifier::Tag(raw_identifier.to_owned()), opcode, }) } } #[derive(Clone, Debug, PartialEq)] enum Identifier { Tag(String), } #[derive(Clone, Debug, PartialEq)] enum Opcode { ReplaceChildren, Replace, Before, Push, } struct ExtractedCommand<'r> { command: Command, after: &'r str, } #[derive(Debug, thiserror::Error)] pub enum PartError { #[error("part needs to start with a setup! like this:\n\"Setup: \"")] NoSetup, #[error("command has no content")] EmptyAction, #[error("the command-line is incorrect: {line}")] IncorrectCommand { line: String }, #[error("the operation {op} is invalid")] InvalidOperation { op: String }, } #[cfg(test)] mod test { use crate::{extract_command_from_start, Action, Command, Identifier, Opcode, Part}; macro_rules! str { ($str:literal) => { String::from($str) }; } macro_rules! cmd { ($id:expr) => { Command { identifier: Identifier::Tag(String::from($id)), opcode: Opcode::ReplaceChildren, } }; } #[test] fn extracts_command() { let raw = "{cmd} single line"; let ext = extract_command_from_start(raw).unwrap().unwrap(); assert_eq!(ext.command, cmd!("cmd")); assert_eq!(ext.after, " single line"); } #[test] fn extracts_command_newline() { let raw = "{cmd}\nnext line"; let ext = extract_command_from_start(raw).unwrap().unwrap(); assert_eq!(ext.command, cmd!("cmd")); assert_eq!(ext.after, "\nnext line"); } #[test] fn extracts_simple_part() { let raw = "Setup: setup.html\n\n{cmd} content"; let part: Part = raw.parse().unwrap(); assert_eq!(part.setup, "setup.html"); assert_eq!( part.actions, vec![Action { command: cmd!("cmd"), content: str!("content") }] ) } #[test] fn extracts_simple_part_multiline() { let raw = "Setup: setup.html\n\n{cmd}\ncontent"; let part: Part = raw.parse().unwrap(); assert_eq!(part.setup, "setup.html"); assert_eq!( part.actions, vec![Action { command: cmd!("cmd"), content: str!("content") }] ) } #[test] fn extracts_simple_part_multiple_singleline() { let raw = "Setup: setup.html\n\n{cmd} content\n{cmd2} contents2"; let part: Part = raw.parse().unwrap(); assert_eq!(part.setup, "setup.html"); assert_eq!( part.actions, vec![ Action { command: cmd!("cmd"), content: str!("content") }, Action { command: cmd!("cmd2"), content: str!("content2") } ] ) } #[test] fn extracts_simple_part_multiple_actions() { let raw = "Setup: setup.html\n\n{cmd} content\n{cmd2}\ncontent2\n{bait}\n\n{cmd3}\ncontent"; let part: Part = raw.parse().unwrap(); assert_eq!(part.setup, "setup.html"); assert_eq!( part.actions, vec![ Action { command: cmd!("cmd"), content: str!("content") }, Action { command: cmd!("cmd2"), content: str!("content2\n{bait}") }, Action { command: cmd!("cmd3"), content: str!("content") } ] ) } }