Filters to remove html headers and plaintext html

This commit is contained in:
viridian 2024-04-30 17:05:54 +02:00
parent f2da7f6e2d
commit ebdb4a3278
Signed by: viridian
GPG key ID: DCD4DF95CE23FE8C

View file

@ -1,8 +1,9 @@
use derive_more::Constructor; use derive_more::Constructor;
use serde::Serialize; use serde::Serialize;
use serde_json::value::Value; use serde_json::value::Value;
use std::collections::HashMap; use std::{collections::HashMap, fmt::Debug};
use tera::{Context, Tera}; use tera::{Context, Tera};
// use markup5ever::interface::tree_builder::TreeSink;
#[derive(Constructor, Debug, Serialize)] #[derive(Constructor, Debug, Serialize)]
pub struct BlogPost { pub struct BlogPost {
@ -15,28 +16,42 @@ pub struct BlogPost {
} }
pub fn generate(blog_posts: Vec<BlogPost>, template_dir: &String) -> String { pub fn generate(blog_posts: Vec<BlogPost>, template_dir: &String) -> String {
let mut blog_posts = blog_posts;
for post in &mut blog_posts {
post.content = get_unformatted_text(post.content.clone());
}
let mut tera = Tera::new(&format!("{}/*", template_dir)).unwrap(); let mut tera = Tera::new(&format!("{}/*", template_dir)).unwrap();
tera.autoescape_on(vec![]); tera.autoescape_on(vec![]);
tera.register_filter("truncate", truncate); tera.register_filter("truncate", truncate);
tera.register_filter("get_unformatted_content", get_unformatted_text);
tera.register_filter("remove_headers", remove_headers);
let mut context = Context::new(); let mut context = Context::new();
context.insert("blog_posts", &blog_posts); context.insert("blog_posts", &blog_posts);
tera.render("index.html", &context).unwrap() tera.render("index.html", &context).unwrap()
} }
fn get_unformatted_text(html: String) -> String { fn get_unformatted_text(html: &Value, _: &HashMap<String, Value>) -> Result<Value, tera::Error> {
let frag = scraper::Html::parse_fragment(&html); let frag = scraper::Html::parse_fragment(&html.as_str().unwrap());
let mut unformatted_text = String::new(); let mut unformatted_text = String::new();
for node in frag.tree { for node in frag.tree {
if let scraper::node::Node::Text(text) = node { if let scraper::node::Node::Text(text) = node {
unformatted_text.push_str(&text); unformatted_text.push_str(&text);
} }
} }
unformatted_text Ok(Value::String(unformatted_text))
}
fn remove_headers(html: &Value, _: &HashMap<String, Value> ) -> Result<Value, tera::Error> {
let frag = scraper::Html::parse_fragment(html.as_str().unwrap());
let mut mut_frag = frag.clone();
let headers = vec!["h1","h2","h3","h4","h5","h6"];
for node in frag.tree.nodes() {
if let scraper::Node::Element(element) = node.value() {
if headers.contains(&element.name.local.to_string().as_str()) {
mut_frag.tree.get_mut(node.id()).unwrap().detach();
}
}
}
Ok(Value::String(mut_frag.html()))
} }
fn truncate(value: &Value, args: &HashMap<String, Value>) -> Result<Value, tera::Error> { fn truncate(value: &Value, args: &HashMap<String, Value>) -> Result<Value, tera::Error> {
@ -56,11 +71,4 @@ mod tests {
let truncated_string = truncate(&Value::String("Meow Nya".to_string()), &args).unwrap(); let truncated_string = truncate(&Value::String("Meow Nya".to_string()), &args).unwrap();
assert_eq!(truncated_string.as_str().unwrap(), "Meow"); assert_eq!(truncated_string.as_str().unwrap(), "Meow");
} }
#[test]
fn unformat_html() {
let html_src = "<p>Meow nyaaa<em> UwU</em></p>".to_string();
let unformatted_text = get_unformatted_text(html_src);
assert_eq!(unformatted_text, "Meow nyaaa UwU");
}
} }