Compare commits

...

11 commits

Author SHA1 Message Date
viridian 5bbe957837
Fix spelling :3 2024-05-12 13:26:29 +02:00
viridian 20cfb4e7e1
Remove feature flag: ascii_char 2024-05-06 17:04:11 +02:00
viridian 3366f31943
Atom feeds can have non ascii chars in them 2024-05-02 17:04:49 +02:00
viridian 4c26214246
Add author config 2024-05-02 16:20:08 +02:00
viridian ebdb4a3278
Filters to remove html headers and plaintext html 2024-04-30 17:05:54 +02:00
viridian f2da7f6e2d
Atom syndication 2024-04-21 15:18:03 +02:00
viridian 5f0c06b6cf
Add config and some code for atom feeds 2024-04-20 22:22:59 +02:00
viridian 9337db2a0a
Use rfc3339 for date
with this commit mlem now requires nightly to build
2024-04-20 15:01:28 +02:00
viridian adb3b15d29
Regex match ungreedy 2024-04-20 12:19:31 +02:00
viridian ea705cf624
Better error handling 2024-04-20 12:17:53 +02:00
viridian ae3c4a9a6f
Formatting 2024-04-09 17:28:56 +02:00
9 changed files with 267 additions and 66 deletions

23
Cargo.lock generated
View file

@ -350,6 +350,15 @@ version = "0.14.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604"
[[package]]
name = "html-escape"
version = "0.2.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d1ad449764d627e22bfd7cd5e8868264fc9236e07c752972b4080cd351cb476"
dependencies = [
"utf8-width",
]
[[package]] [[package]]
name = "html5ever" name = "html5ever"
version = "0.26.0" version = "0.26.0"
@ -512,6 +521,7 @@ version = "0.2.0"
dependencies = [ dependencies = [
"chrono", "chrono",
"derive_more", "derive_more",
"html-escape",
"markdown", "markdown",
"regex", "regex",
"scraper", "scraper",
@ -519,6 +529,7 @@ dependencies = [
"serde_json", "serde_json",
"tera", "tera",
"toml", "toml",
"xml-builder",
] ]
[[package]] [[package]]
@ -1193,6 +1204,12 @@ version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
[[package]]
name = "utf8-width"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "86bd8d4e895da8537e5315b8254664e6b769c4ff3db18321b297a1e7004392e3"
[[package]] [[package]]
name = "version_check" name = "version_check"
version = "0.9.4" version = "0.9.4"
@ -1432,6 +1449,12 @@ dependencies = [
"memchr", "memchr",
] ]
[[package]]
name = "xml-builder"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "efc4f1a86af7800dfc4056c7833648ea4515ae21502060b5c98114d828f5333b"
[[package]] [[package]]
name = "zerocopy" name = "zerocopy"
version = "0.7.32" version = "0.7.32"

View file

@ -15,3 +15,5 @@ chrono = "0.4"
derive_more = "0.99" derive_more = "0.99"
scraper = "0.19" scraper = "0.19"
serde_json = "1.0" serde_json = "1.0"
xml-builder = "0.5"
html-escape = "0.2"

View file

@ -1,15 +1,30 @@
use serde::{Deserialize,Serialize}; use serde::{Deserialize, Serialize};
use std::fs::read_to_string; use std::fs::read_to_string;
#[derive(Deserialize,Serialize,Debug)] #[derive(Deserialize, Serialize, Debug)]
pub struct Config { pub struct Config {
pub output_dir: String, pub output_dir: String,
pub src_dir: String, pub src_dir: String,
pub templates_dir: String, pub templates_dir: String,
pub emoji_config: Option<EmojiConfig>, pub emoji_config: Option<EmojiConfig>,
pub syndication: Option<Syndication>,
}
#[derive(Deserialize, Serialize, Debug)]
pub struct Syndication {
pub title: String,
pub link: String,
pub blog_root: String,
pub icon: Option<String>,
pub subtitle: Option<String>,
pub author: String,
pub atom: Option<AtomConfig>,
} }
#[derive(Deserialize,Serialize,Debug)] #[derive(Deserialize, Serialize, Debug)]
pub struct AtomConfig {
pub enabled: bool,
}
#[derive(Deserialize, Serialize, Debug)]
pub struct EmojiConfig { pub struct EmojiConfig {
pub emoji_web_directory: String, pub emoji_web_directory: String,
pub emoji_local_directory: String, pub emoji_local_directory: String,
@ -17,7 +32,13 @@ pub struct EmojiConfig {
impl Default for Config { impl Default for Config {
fn default() -> Self { fn default() -> Self {
Config { output_dir: "output".to_string() , src_dir: "md_src".to_string(), templates_dir: "templates".to_string(), emoji_config: None } Config {
output_dir: "output".to_string(),
src_dir: "md_src".to_string(),
templates_dir: "templates".to_string(),
emoji_config: None,
syndication: None,
}
} }
} }
pub fn read_config() -> Config { pub fn read_config() -> Config {

View file

@ -2,10 +2,7 @@ use regex::Regex;
use std::{ffi::OsString, fs::read_dir}; use std::{ffi::OsString, fs::read_dir};
use crate::config; use crate::config;
pub fn emoji_pass( pub fn emoji_pass(markdown: &str, emoji_config: &Option<config::EmojiConfig>) -> String {
markdown: &str,
emoji_config: &Option<config::EmojiConfig>,
) -> String {
if emoji_config.is_none() { if emoji_config.is_none() {
return markdown.to_string(); return markdown.to_string();
} }
@ -14,13 +11,15 @@ pub fn emoji_pass(
let re_emojis = Regex::new(r":\w+:").unwrap(); let re_emojis = Regex::new(r":\w+:").unwrap();
for emoji in re_emojis.find_iter(&markdown.clone()) { for emoji in re_emojis.find_iter(&markdown.clone()) {
let emoji_file_name = let emoji_file_name = get_emoji_file_name(
get_emoji_file_name(&get_emoji_name(emoji.as_str()), &emoji_config.as_ref().unwrap().emoji_local_directory); &get_emoji_name(emoji.as_str()),
&emoji_config.as_ref().unwrap().emoji_local_directory,
);
if emoji_file_name.is_none() { if emoji_file_name.is_none() {
continue; continue;
} }
let html_string = format!( let html_string = format!(
"<img class=\"emoji\" src=\"{}/{}\"><img>", "<img class=\"emoji\" src=\"{}/{}\"></img>",
emoji_config.as_ref().unwrap().emoji_web_directory, emoji_config.as_ref().unwrap().emoji_web_directory,
emoji_file_name.unwrap().to_str().unwrap() emoji_file_name.unwrap().to_str().unwrap()
); );

View file

@ -1,51 +1,65 @@
use derive_more::Constructor; use derive_more::Constructor;
use serde::Serialize; use serde::Serialize;
use serde_json::value::Value; use serde_json::value::Value;
use std::collections::HashMap; use std::{collections::HashMap, fmt::Debug};
use tera::{Context, Tera}; use tera::{Context, Tera};
#[derive(Constructor, Debug, Serialize)] #[derive(Constructor, Debug, Serialize)]
pub struct BlogPost { pub struct BlogPost {
pub title: String, pub title: String,
pub human_date: String, pub human_date: String,
pub sort_date: i64, // Sort date = unix timestamp pub sort_date: i64, // Sort date = unix timestamp
pub content: String, // Unformatted Content of blog post can be used to display a preview or reading time estimate. Will be html when assigned but later turned into raw text pub last_updated: i64,
pub content: String,
pub output_file_name: String, pub output_file_name: String,
} }
pub fn generate(mut blog_posts: Vec<BlogPost>, template_dir: &String) -> String { pub fn generate(blog_posts: Vec<BlogPost>, template_dir: &String) -> String {
for post in &mut blog_posts {
post.content = get_unformatted_text(post.content.clone());
}
let mut tera = Tera::new(&format!("{}/*", template_dir)).unwrap(); let mut tera = Tera::new(&format!("{}/*", template_dir)).unwrap();
tera.autoescape_on(vec![]); tera.autoescape_on(vec![]);
tera.register_filter("truncate", truncate); tera.register_filter("truncate", truncate);
tera.register_filter("get_unformatted_content", get_unformatted_text);
tera.register_filter("remove_headers", remove_headers);
let mut context = Context::new(); let mut context = Context::new();
context.insert("blog_posts", &blog_posts); context.insert("blog_posts", &blog_posts);
tera.render("index.html", &context).unwrap() tera.render("index.html", &context).unwrap()
} }
fn get_unformatted_text(html: String) -> String { fn get_unformatted_text(html: &Value, _: &HashMap<String, Value>) -> Result<Value, tera::Error> {
let frag = scraper::Html::parse_fragment(&html); let frag = scraper::Html::parse_fragment(&html.as_str().unwrap());
let mut unformatted_text = String::new(); let mut unformatted_text = String::new();
for node in frag.tree { for node in frag.tree {
if let scraper::node::Node::Text(text) = node { if let scraper::node::Node::Text(text) = node {
unformatted_text.push_str(&text); unformatted_text.push_str(&text);
} }
} }
unformatted_text Ok(Value::String(unformatted_text))
}
fn remove_headers(html: &Value, _: &HashMap<String, Value> ) -> Result<Value, tera::Error> {
let frag = scraper::Html::parse_fragment(html.as_str().unwrap());
let mut mut_frag = frag.clone();
let headers = vec!["h1","h2","h3","h4","h5","h6"];
for node in frag.tree.nodes() {
if let scraper::Node::Element(element) = node.value() {
if headers.contains(&element.name.local.to_string().as_str()) {
mut_frag.tree.get_mut(node.id()).unwrap().detach();
}
}
}
Ok(Value::String(mut_frag.html()))
} }
fn truncate(value: &Value, args: &HashMap<String, Value>) -> Result<Value, tera::Error> { fn truncate(value: &Value, args: &HashMap<String, Value>) -> Result<Value, tera::Error> {
let mut value = value.as_str().unwrap().to_string(); let mut value = value.as_str().unwrap().to_string();
let new_len:usize = args.get("len").unwrap().as_str().unwrap().parse().unwrap(); let new_len: usize = args.get("len").unwrap().as_str().unwrap().parse().unwrap();
value value.truncate(new_len);
.truncate(new_len);
Ok(Value::String(value.to_string())) Ok(Value::String(value.to_string()))
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use crate::index::*; use crate::index::*;
@ -54,13 +68,6 @@ mod tests {
let mut args: HashMap<String, Value> = HashMap::new(); let mut args: HashMap<String, Value> = HashMap::new();
args.insert("len".to_string(), Value::String("4".to_string())); args.insert("len".to_string(), Value::String("4".to_string()));
let truncated_string = truncate(&Value::String("Meow Nya".to_string()), &args).unwrap(); let truncated_string = truncate(&Value::String("Meow Nya".to_string()), &args).unwrap();
assert_eq!(truncated_string.as_str().unwrap(),"Meow"); assert_eq!(truncated_string.as_str().unwrap(), "Meow");
}
#[test]
fn unformat_html() {
let html_src = "<p>Meow nyaaa<em> UwU</em></p>".to_string();
let unformatted_text = get_unformatted_text(html_src);
assert_eq!(unformatted_text,"Meow nyaaa UwU");
} }
} }

View file

@ -1,3 +1,4 @@
#![feature(str_split_remainder)]
use chrono::prelude::*; use chrono::prelude::*;
use derive_more::Constructor; use derive_more::Constructor;
use markdown::{to_html_with_options, CompileOptions, Options}; use markdown::{to_html_with_options, CompileOptions, Options};
@ -12,14 +13,17 @@ pub fn read_src_files(src_dir: &str) -> Vec<SrcMD> {
let mut files: Vec<SrcMD> = Vec::new(); let mut files: Vec<SrcMD> = Vec::new();
for file in read_dir(src_dir).expect("Cant read src dir") { for file in read_dir(src_dir).expect("Cant read src dir") {
let file = file.unwrap(); let file = file.unwrap();
let kv = get_kv(read_to_string(file.path()).unwrap()).0; let frontmatter = get_frontmatter(read_to_string(file.path()).unwrap()).0;
let title = kv.get("title").unwrap(); let title = frontmatter
let date = kv.get("date").unwrap(); .get("title")
.expect("Error getting field: title from frontmatter");
let date = frontmatter
.get("date")
.expect("Error getting field: date from frontmatter");
files.push(SrcMD::new( files.push(SrcMD::new(
file.path(), file.path(),
title.to_string(), title.to_string(),
DateTime::parse_from_str(&format!("{date} 00:00:00 +00:00"), "%Y-%m-%d %H:%M:%S %z") DateTime::parse_from_rfc3339(date).unwrap(),
.unwrap(),
file.path().file_stem().unwrap().to_os_string(), file.path().file_stem().unwrap().to_os_string(),
)) ))
} }
@ -32,7 +36,7 @@ pub fn write_to_fs(html: String, output_dir: &String, file_name: &String) {
.unwrap_or_else(|_| panic!("Error writing {file_name}")); .unwrap_or_else(|_| panic!("Error writing {file_name}"));
} }
#[derive(Constructor)] #[derive(Constructor, Debug)]
pub struct SrcMD { pub struct SrcMD {
pub path: PathBuf, pub path: PathBuf,
pub title: String, pub title: String,
@ -40,14 +44,15 @@ pub struct SrcMD {
pub file_name: OsString, pub file_name: OsString,
} }
pub fn generate_blog_entry(markdown: String, template_dir: &String) -> (String, String) { pub fn generate_blog_entry(
markdown: String,
template_dir: &String,
frontmatter: &mut HashMap<String, String>,
) -> Option<(String, String)> {
let markdown = markdown.clone(); let markdown = markdown.clone();
let mut tera = Tera::new(&format!("{template_dir}/*")).unwrap(); let mut tera = Tera::new(&format!("{template_dir}/*")).unwrap();
tera.autoescape_on(vec![]); tera.autoescape_on(vec![]);
let (mut key_value, markdown) = get_kv(markdown);
let html_markdown = to_html_with_options( let html_markdown = to_html_with_options(
&markdown, &markdown,
&Options { &Options {
@ -61,22 +66,20 @@ pub fn generate_blog_entry(markdown: String, template_dir: &String) -> (String,
) )
.unwrap(); .unwrap();
key_value.insert("blog_content".to_string(), html_markdown.clone()); frontmatter.insert("blog_content".to_string(), html_markdown.clone());
let context = Context::from_serialize(&key_value).unwrap(); let context = Context::from_serialize(&frontmatter).unwrap();
let templated_html = tera let templated_html = tera.render(frontmatter.get("template")?, &context).unwrap();
.render(key_value.get("template").unwrap(), &context) Some((templated_html, html_markdown))
.unwrap();
(templated_html, html_markdown)
} }
pub fn get_kv(markdown: String) -> (HashMap<String, String>, String) { pub fn get_frontmatter(markdown: String) -> (HashMap<String, String>, String) {
let re_key_value = Regex::new(r"(?ms)---(.*)---(?:\n)").unwrap(); let re_key_value = Regex::new(r"(?msU)---(.*)---(?:\n)").unwrap();
let key_value_string = re_key_value let key_value_string = re_key_value
.find(markdown.as_str()) .find(markdown.as_str())
.expect("Can't find key value map in markdown"); .expect("Can't find frontmatter in markdown");
let content_markdown = re_key_value let content_markdown = re_key_value
.replace(markdown.clone().as_str(), "") .replace(markdown.clone().as_str(), "")
@ -88,6 +91,12 @@ pub fn get_kv(markdown: String) -> (HashMap<String, String>, String) {
if line == "---" { if line == "---" {
continue; continue;
} }
let second_value = {
let mut line_iter = line.split(':');
line_iter.next();
line_iter.remainder().unwrap().trim().to_string()
};
key_value.insert( key_value.insert(
line.split(':') line.split(':')
.collect::<Vec<&str>>() .collect::<Vec<&str>>()
@ -95,12 +104,7 @@ pub fn get_kv(markdown: String) -> (HashMap<String, String>, String) {
.unwrap() .unwrap()
.trim() .trim()
.to_string(), .to_string(),
line.split(':') second_value,
.collect::<Vec<&str>>()
.get(1)
.unwrap()
.trim()
.to_string(),
); );
} }

View file

@ -2,10 +2,11 @@ use std::fs::read_to_string;
pub mod config; pub mod config;
pub mod emoji; pub mod emoji;
pub mod index; pub mod index;
pub mod syndication;
use mlem::*; use mlem::*;
use std::env; use std::env;
fn main(){ fn main() {
let args: Vec<String> = env::args().collect(); let args: Vec<String> = env::args().collect();
if args.len() == 1 { if args.len() == 1 {
@ -24,27 +25,46 @@ fn generate() {
let mut post_index: Vec<index::BlogPost> = Vec::new(); let mut post_index: Vec<index::BlogPost> = Vec::new();
for file in raw_files { for file in raw_files {
let mut markdown = read_to_string(file.path).expect("File does not exist"); let markdown = read_to_string(file.path).expect("File does not exist");
markdown = emoji::emoji_pass( let (mut frontmatter, mut markdown) = get_frontmatter(markdown);
&markdown, markdown = emoji::emoji_pass(&markdown, &config.emoji_config);
&config.emoji_config,
);
let (html, index_content) = generate_blog_entry(markdown, &config.templates_dir); let (html, index_content) =
generate_blog_entry(markdown, &config.templates_dir, &mut frontmatter)
.unwrap_or_else(|| panic!("Error generating entry {}", &file.title));
write_to_fs( write_to_fs(
html, html,
&config.output_dir, &config.output_dir,
&file.file_name.clone().into_string().unwrap(), &file.file_name.clone().into_string().unwrap(),
); );
let last_updated: Option<&String> = frontmatter.get("updated_at");
let updated = {
if last_updated.is_some() {
chrono::DateTime::parse_from_rfc3339(last_updated.unwrap())
.unwrap()
.timestamp()
} else {
file.date.clone().timestamp()
}
};
post_index.push(index::BlogPost::new( post_index.push(index::BlogPost::new(
file.title, file.title,
file.date.format("%Y-%m-%d").to_string(), file.date.format("%Y-%m-%d").to_string(),
file.date.timestamp(), file.date.timestamp(),
updated,
index_content, index_content,
format!("{}.html", file.file_name.to_str().unwrap()), format!("{}.html", file.file_name.to_str().unwrap()),
)); ));
} }
if config.syndication.is_some() {
if config.syndication.as_ref().unwrap().atom.is_some() {
let atom = syndication::atom::generate(config.syndication.unwrap(), &post_index);
std::fs::write(format!("{}/atom.xml", config.output_dir), atom)
.unwrap_or_else(|_| panic!("Error writing atom feed"));
}
}
let index = index::generate(post_index, &config.templates_dir); let index = index::generate(post_index, &config.templates_dir);
std::fs::write(format!("{}/index.html", config.output_dir), index) std::fs::write(format!("{}/index.html", config.output_dir), index)
@ -56,7 +76,7 @@ fn create_default_env() {
let config = config::Config::default(); let config = config::Config::default();
let toml_config = toml::to_string(&config).unwrap(); let toml_config = toml::to_string(&config).unwrap();
std::fs::write("mlem.toml", toml_config).unwrap(); std::fs::write("mlem.toml", toml_config).unwrap();
println!("Creating default directorys"); println!("Creating default directories");
std::fs::create_dir(config.output_dir).unwrap(); std::fs::create_dir(config.output_dir).unwrap();
std::fs::create_dir(config.src_dir).unwrap(); std::fs::create_dir(config.src_dir).unwrap();
std::fs::create_dir(config.templates_dir).unwrap(); std::fs::create_dir(config.templates_dir).unwrap();

99
src/syndication/atom.rs Normal file
View file

@ -0,0 +1,99 @@
use xml_builder::{XMLBuilder, XMLElement, XMLVersion};
use crate::{xml_tag_attribute, xml_tag_text};
pub fn generate(
config: crate::config::Syndication,
post_index: &Vec<crate::index::BlogPost>,
) -> String {
let mut xml = XMLBuilder::new()
.version(XMLVersion::XML1_0)
.encoding("UTF-8".into())
.build();
let mut feed = XMLElement::new("feed");
feed.add_attribute("xmlns", "http://www.w3.org/2005/Atom");
xml_tag_text!("id", config.link.clone(), feed);
xml_tag_text!("title", config.title, feed);
xml_tag_attribute!("link", feed, ("href", &config.link));
let mut author = XMLElement::new("author");
let mut name = XMLElement::new("name");
name.add_text(config.author).unwrap();
author.add_child(name).unwrap();
feed.add_child(author).unwrap();
let last_update = chrono::DateTime::from_timestamp(last_update(post_index), 0).unwrap();
xml_tag_text!("updated", last_update.to_rfc3339(), feed);
if config.subtitle.is_some() {
xml_tag_text!("subtitle", config.subtitle.unwrap(), feed);
}
if config.icon.is_some() {
xml_tag_text!("icon", config.icon.unwrap(), feed);
}
for post in post_index {
let mut entry = XMLElement::new("entry");
xml_tag_text!("title", post.title.clone(), entry);
xml_tag_attribute!(
"link",
entry,
(
"href",
&format!(
"{}{}/{}",
&config.link, &config.blog_root, &post.output_file_name
)
)
);
xml_tag_text!(
"id",
format!(
"{}{}/{}",
&config.link, &config.blog_root, &post.output_file_name
),
entry
);
let last_update = chrono::DateTime::from_timestamp(post.last_updated, 0)
.unwrap()
.to_rfc3339();
xml_tag_text!("updated", last_update, entry);
let mut content = XMLElement::new("content");
content.add_attribute("type", "html");
content
.add_text(crate::syndication::escape_html(post.content.clone()))
.unwrap();
entry.add_child(content).unwrap();
feed.add_child(entry).unwrap();
}
xml.set_root_element(feed);
let mut writer: Vec<u8> = Vec::new();
xml.generate(&mut writer).unwrap();
String::from_utf8(writer).unwrap()
}
fn last_update(post_index: &Vec<crate::index::BlogPost>) -> i64 {
let mut last_timestamp: i64 = i64::MIN;
for post in post_index {
if post.sort_date > last_timestamp {
last_timestamp = post.sort_date;
}
}
last_timestamp
}

26
src/syndication/mod.rs Normal file
View file

@ -0,0 +1,26 @@
pub mod atom;
#[macro_export]
macro_rules! xml_tag_text {
($a: expr, $b: expr, $c: expr) => {
let mut tag = XMLElement::new($a);
tag.add_text($b).unwrap();
$c.add_child(tag).unwrap();
};
}
#[macro_export]
macro_rules! xml_tag_attribute {
($a: expr, $b: expr, $( $c: expr),+ ) => {
let mut tag = XMLElement::new($a);
$(
tag.add_attribute($c.0,$c.1);
)+
$b.add_child(tag).unwrap();
};
}
fn escape_html(unescaped_html: String) -> String {
html_escape::encode_text(&unescaped_html).to_string()
}