From ebdb4a32787789c8b907a1151b5843a8655ad22a Mon Sep 17 00:00:00 2001 From: viridian Date: Tue, 30 Apr 2024 17:05:54 +0200 Subject: [PATCH] Filters to remove html headers and plaintext html --- src/index.rs | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/src/index.rs b/src/index.rs index 29bf178..735213a 100644 --- a/src/index.rs +++ b/src/index.rs @@ -1,8 +1,9 @@ use derive_more::Constructor; use serde::Serialize; use serde_json::value::Value; -use std::collections::HashMap; +use std::{collections::HashMap, fmt::Debug}; use tera::{Context, Tera}; +// use markup5ever::interface::tree_builder::TreeSink; #[derive(Constructor, Debug, Serialize)] pub struct BlogPost { @@ -15,28 +16,42 @@ pub struct BlogPost { } pub fn generate(blog_posts: Vec, template_dir: &String) -> String { - let mut blog_posts = blog_posts; - for post in &mut blog_posts { - post.content = get_unformatted_text(post.content.clone()); - } let mut tera = Tera::new(&format!("{}/*", template_dir)).unwrap(); tera.autoescape_on(vec![]); tera.register_filter("truncate", truncate); + tera.register_filter("get_unformatted_content", get_unformatted_text); + tera.register_filter("remove_headers", remove_headers); let mut context = Context::new(); context.insert("blog_posts", &blog_posts); tera.render("index.html", &context).unwrap() } -fn get_unformatted_text(html: String) -> String { - let frag = scraper::Html::parse_fragment(&html); +fn get_unformatted_text(html: &Value, _: &HashMap) -> Result { + let frag = scraper::Html::parse_fragment(&html.as_str().unwrap()); let mut unformatted_text = String::new(); for node in frag.tree { if let scraper::node::Node::Text(text) = node { unformatted_text.push_str(&text); } } - unformatted_text + Ok(Value::String(unformatted_text)) + +} + +fn remove_headers(html: &Value, _: &HashMap ) -> Result { + let frag = scraper::Html::parse_fragment(html.as_str().unwrap()); + let mut mut_frag = frag.clone(); + let headers = vec!["h1","h2","h3","h4","h5","h6"]; + for node in frag.tree.nodes() { + if let scraper::Node::Element(element) = node.value() { + if headers.contains(&element.name.local.to_string().as_str()) { + mut_frag.tree.get_mut(node.id()).unwrap().detach(); + } + } + } + + Ok(Value::String(mut_frag.html())) } fn truncate(value: &Value, args: &HashMap) -> Result { @@ -56,11 +71,4 @@ mod tests { let truncated_string = truncate(&Value::String("Meow Nya".to_string()), &args).unwrap(); assert_eq!(truncated_string.as_str().unwrap(), "Meow"); } - - #[test] - fn unformat_html() { - let html_src = "

Meow nyaaa UwU

".to_string(); - let unformatted_text = get_unformatted_text(html_src); - assert_eq!(unformatted_text, "Meow nyaaa UwU"); - } }