Filters to remove html headers and plaintext html

This commit is contained in:
viridian 2024-04-30 17:05:54 +02:00
parent f2da7f6e2d
commit ebdb4a3278
Signed by: viridian
GPG key ID: DCD4DF95CE23FE8C

View file

@ -1,8 +1,9 @@
use derive_more::Constructor;
use serde::Serialize;
use serde_json::value::Value;
use std::collections::HashMap;
use std::{collections::HashMap, fmt::Debug};
use tera::{Context, Tera};
// use markup5ever::interface::tree_builder::TreeSink;
#[derive(Constructor, Debug, Serialize)]
pub struct BlogPost {
@ -15,28 +16,42 @@ pub struct BlogPost {
}
pub fn generate(blog_posts: Vec<BlogPost>, template_dir: &String) -> String {
let mut blog_posts = blog_posts;
for post in &mut blog_posts {
post.content = get_unformatted_text(post.content.clone());
}
let mut tera = Tera::new(&format!("{}/*", template_dir)).unwrap();
tera.autoescape_on(vec![]);
tera.register_filter("truncate", truncate);
tera.register_filter("get_unformatted_content", get_unformatted_text);
tera.register_filter("remove_headers", remove_headers);
let mut context = Context::new();
context.insert("blog_posts", &blog_posts);
tera.render("index.html", &context).unwrap()
}
fn get_unformatted_text(html: String) -> String {
let frag = scraper::Html::parse_fragment(&html);
fn get_unformatted_text(html: &Value, _: &HashMap<String, Value>) -> Result<Value, tera::Error> {
let frag = scraper::Html::parse_fragment(&html.as_str().unwrap());
let mut unformatted_text = String::new();
for node in frag.tree {
if let scraper::node::Node::Text(text) = node {
unformatted_text.push_str(&text);
}
}
unformatted_text
Ok(Value::String(unformatted_text))
}
fn remove_headers(html: &Value, _: &HashMap<String, Value> ) -> Result<Value, tera::Error> {
let frag = scraper::Html::parse_fragment(html.as_str().unwrap());
let mut mut_frag = frag.clone();
let headers = vec!["h1","h2","h3","h4","h5","h6"];
for node in frag.tree.nodes() {
if let scraper::Node::Element(element) = node.value() {
if headers.contains(&element.name.local.to_string().as_str()) {
mut_frag.tree.get_mut(node.id()).unwrap().detach();
}
}
}
Ok(Value::String(mut_frag.html()))
}
fn truncate(value: &Value, args: &HashMap<String, Value>) -> Result<Value, tera::Error> {
@ -56,11 +71,4 @@ mod tests {
let truncated_string = truncate(&Value::String("Meow Nya".to_string()), &args).unwrap();
assert_eq!(truncated_string.as_str().unwrap(), "Meow");
}
#[test]
fn unformat_html() {
let html_src = "<p>Meow nyaaa<em> UwU</em></p>".to_string();
let unformatted_text = get_unformatted_text(html_src);
assert_eq!(unformatted_text, "Meow nyaaa UwU");
}
}