Compare commits

...

11 commits

Author SHA1 Message Date
viridian 5bbe957837
Fix spelling :3 2024-05-12 13:26:29 +02:00
viridian 20cfb4e7e1
Remove feature flag: ascii_char 2024-05-06 17:04:11 +02:00
viridian 3366f31943
Atom feeds can have non ascii chars in them 2024-05-02 17:04:49 +02:00
viridian 4c26214246
Add author config 2024-05-02 16:20:08 +02:00
viridian ebdb4a3278
Filters to remove html headers and plaintext html 2024-04-30 17:05:54 +02:00
viridian f2da7f6e2d
Atom syndication 2024-04-21 15:18:03 +02:00
viridian 5f0c06b6cf
Add config and some code for atom feeds 2024-04-20 22:22:59 +02:00
viridian 9337db2a0a
Use rfc3339 for date
with this commit mlem now requires nightly to build
2024-04-20 15:01:28 +02:00
viridian adb3b15d29
Regex match ungreedy 2024-04-20 12:19:31 +02:00
viridian ea705cf624
Better error handling 2024-04-20 12:17:53 +02:00
viridian ae3c4a9a6f
Formatting 2024-04-09 17:28:56 +02:00
9 changed files with 267 additions and 66 deletions

23
Cargo.lock generated
View file

@ -350,6 +350,15 @@ version = "0.14.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604"
[[package]]
name = "html-escape"
version = "0.2.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d1ad449764d627e22bfd7cd5e8868264fc9236e07c752972b4080cd351cb476"
dependencies = [
"utf8-width",
]
[[package]]
name = "html5ever"
version = "0.26.0"
@ -512,6 +521,7 @@ version = "0.2.0"
dependencies = [
"chrono",
"derive_more",
"html-escape",
"markdown",
"regex",
"scraper",
@ -519,6 +529,7 @@ dependencies = [
"serde_json",
"tera",
"toml",
"xml-builder",
]
[[package]]
@ -1193,6 +1204,12 @@ version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
[[package]]
name = "utf8-width"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "86bd8d4e895da8537e5315b8254664e6b769c4ff3db18321b297a1e7004392e3"
[[package]]
name = "version_check"
version = "0.9.4"
@ -1432,6 +1449,12 @@ dependencies = [
"memchr",
]
[[package]]
name = "xml-builder"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "efc4f1a86af7800dfc4056c7833648ea4515ae21502060b5c98114d828f5333b"
[[package]]
name = "zerocopy"
version = "0.7.32"

View file

@ -15,3 +15,5 @@ chrono = "0.4"
derive_more = "0.99"
scraper = "0.19"
serde_json = "1.0"
xml-builder = "0.5"
html-escape = "0.2"

View file

@ -1,15 +1,30 @@
use serde::{Deserialize,Serialize};
use serde::{Deserialize, Serialize};
use std::fs::read_to_string;
#[derive(Deserialize,Serialize,Debug)]
#[derive(Deserialize, Serialize, Debug)]
pub struct Config {
pub output_dir: String,
pub src_dir: String,
pub templates_dir: String,
pub emoji_config: Option<EmojiConfig>,
pub syndication: Option<Syndication>,
}
#[derive(Deserialize, Serialize, Debug)]
pub struct Syndication {
pub title: String,
pub link: String,
pub blog_root: String,
pub icon: Option<String>,
pub subtitle: Option<String>,
pub author: String,
pub atom: Option<AtomConfig>,
}
#[derive(Deserialize,Serialize,Debug)]
#[derive(Deserialize, Serialize, Debug)]
pub struct AtomConfig {
pub enabled: bool,
}
#[derive(Deserialize, Serialize, Debug)]
pub struct EmojiConfig {
pub emoji_web_directory: String,
pub emoji_local_directory: String,
@ -17,7 +32,13 @@ pub struct EmojiConfig {
impl Default for Config {
fn default() -> Self {
Config { output_dir: "output".to_string() , src_dir: "md_src".to_string(), templates_dir: "templates".to_string(), emoji_config: None }
Config {
output_dir: "output".to_string(),
src_dir: "md_src".to_string(),
templates_dir: "templates".to_string(),
emoji_config: None,
syndication: None,
}
}
}
pub fn read_config() -> Config {

View file

@ -2,10 +2,7 @@ use regex::Regex;
use std::{ffi::OsString, fs::read_dir};
use crate::config;
pub fn emoji_pass(
markdown: &str,
emoji_config: &Option<config::EmojiConfig>,
) -> String {
pub fn emoji_pass(markdown: &str, emoji_config: &Option<config::EmojiConfig>) -> String {
if emoji_config.is_none() {
return markdown.to_string();
}
@ -14,13 +11,15 @@ pub fn emoji_pass(
let re_emojis = Regex::new(r":\w+:").unwrap();
for emoji in re_emojis.find_iter(&markdown.clone()) {
let emoji_file_name =
get_emoji_file_name(&get_emoji_name(emoji.as_str()), &emoji_config.as_ref().unwrap().emoji_local_directory);
let emoji_file_name = get_emoji_file_name(
&get_emoji_name(emoji.as_str()),
&emoji_config.as_ref().unwrap().emoji_local_directory,
);
if emoji_file_name.is_none() {
continue;
}
let html_string = format!(
"<img class=\"emoji\" src=\"{}/{}\"><img>",
"<img class=\"emoji\" src=\"{}/{}\"></img>",
emoji_config.as_ref().unwrap().emoji_web_directory,
emoji_file_name.unwrap().to_str().unwrap()
);

View file

@ -1,51 +1,65 @@
use derive_more::Constructor;
use serde::Serialize;
use serde_json::value::Value;
use std::collections::HashMap;
use std::{collections::HashMap, fmt::Debug};
use tera::{Context, Tera};
#[derive(Constructor, Debug, Serialize)]
pub struct BlogPost {
pub title: String,
pub human_date: String,
pub sort_date: i64, // Sort date = unix timestamp
pub content: String, // Unformatted Content of blog post can be used to display a preview or reading time estimate. Will be html when assigned but later turned into raw text
pub sort_date: i64, // Sort date = unix timestamp
pub last_updated: i64,
pub content: String,
pub output_file_name: String,
}
pub fn generate(mut blog_posts: Vec<BlogPost>, template_dir: &String) -> String {
for post in &mut blog_posts {
post.content = get_unformatted_text(post.content.clone());
}
pub fn generate(blog_posts: Vec<BlogPost>, template_dir: &String) -> String {
let mut tera = Tera::new(&format!("{}/*", template_dir)).unwrap();
tera.autoescape_on(vec![]);
tera.register_filter("truncate", truncate);
tera.register_filter("get_unformatted_content", get_unformatted_text);
tera.register_filter("remove_headers", remove_headers);
let mut context = Context::new();
context.insert("blog_posts", &blog_posts);
tera.render("index.html", &context).unwrap()
}
fn get_unformatted_text(html: String) -> String {
let frag = scraper::Html::parse_fragment(&html);
fn get_unformatted_text(html: &Value, _: &HashMap<String, Value>) -> Result<Value, tera::Error> {
let frag = scraper::Html::parse_fragment(&html.as_str().unwrap());
let mut unformatted_text = String::new();
for node in frag.tree {
if let scraper::node::Node::Text(text) = node {
unformatted_text.push_str(&text);
}
}
unformatted_text
Ok(Value::String(unformatted_text))
}
fn remove_headers(html: &Value, _: &HashMap<String, Value> ) -> Result<Value, tera::Error> {
let frag = scraper::Html::parse_fragment(html.as_str().unwrap());
let mut mut_frag = frag.clone();
let headers = vec!["h1","h2","h3","h4","h5","h6"];
for node in frag.tree.nodes() {
if let scraper::Node::Element(element) = node.value() {
if headers.contains(&element.name.local.to_string().as_str()) {
mut_frag.tree.get_mut(node.id()).unwrap().detach();
}
}
}
Ok(Value::String(mut_frag.html()))
}
fn truncate(value: &Value, args: &HashMap<String, Value>) -> Result<Value, tera::Error> {
let mut value = value.as_str().unwrap().to_string();
let new_len:usize = args.get("len").unwrap().as_str().unwrap().parse().unwrap();
value
.truncate(new_len);
let new_len: usize = args.get("len").unwrap().as_str().unwrap().parse().unwrap();
value.truncate(new_len);
Ok(Value::String(value.to_string()))
}
#[cfg(test)]
mod tests {
use crate::index::*;
@ -54,13 +68,6 @@ mod tests {
let mut args: HashMap<String, Value> = HashMap::new();
args.insert("len".to_string(), Value::String("4".to_string()));
let truncated_string = truncate(&Value::String("Meow Nya".to_string()), &args).unwrap();
assert_eq!(truncated_string.as_str().unwrap(),"Meow");
}
#[test]
fn unformat_html() {
let html_src = "<p>Meow nyaaa<em> UwU</em></p>".to_string();
let unformatted_text = get_unformatted_text(html_src);
assert_eq!(unformatted_text,"Meow nyaaa UwU");
assert_eq!(truncated_string.as_str().unwrap(), "Meow");
}
}

View file

@ -1,3 +1,4 @@
#![feature(str_split_remainder)]
use chrono::prelude::*;
use derive_more::Constructor;
use markdown::{to_html_with_options, CompileOptions, Options};
@ -12,14 +13,17 @@ pub fn read_src_files(src_dir: &str) -> Vec<SrcMD> {
let mut files: Vec<SrcMD> = Vec::new();
for file in read_dir(src_dir).expect("Cant read src dir") {
let file = file.unwrap();
let kv = get_kv(read_to_string(file.path()).unwrap()).0;
let title = kv.get("title").unwrap();
let date = kv.get("date").unwrap();
let frontmatter = get_frontmatter(read_to_string(file.path()).unwrap()).0;
let title = frontmatter
.get("title")
.expect("Error getting field: title from frontmatter");
let date = frontmatter
.get("date")
.expect("Error getting field: date from frontmatter");
files.push(SrcMD::new(
file.path(),
title.to_string(),
DateTime::parse_from_str(&format!("{date} 00:00:00 +00:00"), "%Y-%m-%d %H:%M:%S %z")
.unwrap(),
DateTime::parse_from_rfc3339(date).unwrap(),
file.path().file_stem().unwrap().to_os_string(),
))
}
@ -32,7 +36,7 @@ pub fn write_to_fs(html: String, output_dir: &String, file_name: &String) {
.unwrap_or_else(|_| panic!("Error writing {file_name}"));
}
#[derive(Constructor)]
#[derive(Constructor, Debug)]
pub struct SrcMD {
pub path: PathBuf,
pub title: String,
@ -40,14 +44,15 @@ pub struct SrcMD {
pub file_name: OsString,
}
pub fn generate_blog_entry(markdown: String, template_dir: &String) -> (String, String) {
pub fn generate_blog_entry(
markdown: String,
template_dir: &String,
frontmatter: &mut HashMap<String, String>,
) -> Option<(String, String)> {
let markdown = markdown.clone();
let mut tera = Tera::new(&format!("{template_dir}/*")).unwrap();
tera.autoescape_on(vec![]);
let (mut key_value, markdown) = get_kv(markdown);
let html_markdown = to_html_with_options(
&markdown,
&Options {
@ -61,22 +66,20 @@ pub fn generate_blog_entry(markdown: String, template_dir: &String) -> (String,
)
.unwrap();
key_value.insert("blog_content".to_string(), html_markdown.clone());
frontmatter.insert("blog_content".to_string(), html_markdown.clone());
let context = Context::from_serialize(&key_value).unwrap();
let context = Context::from_serialize(&frontmatter).unwrap();
let templated_html = tera
.render(key_value.get("template").unwrap(), &context)
.unwrap();
(templated_html, html_markdown)
let templated_html = tera.render(frontmatter.get("template")?, &context).unwrap();
Some((templated_html, html_markdown))
}
pub fn get_kv(markdown: String) -> (HashMap<String, String>, String) {
let re_key_value = Regex::new(r"(?ms)---(.*)---(?:\n)").unwrap();
pub fn get_frontmatter(markdown: String) -> (HashMap<String, String>, String) {
let re_key_value = Regex::new(r"(?msU)---(.*)---(?:\n)").unwrap();
let key_value_string = re_key_value
.find(markdown.as_str())
.expect("Can't find key value map in markdown");
.expect("Can't find frontmatter in markdown");
let content_markdown = re_key_value
.replace(markdown.clone().as_str(), "")
@ -88,6 +91,12 @@ pub fn get_kv(markdown: String) -> (HashMap<String, String>, String) {
if line == "---" {
continue;
}
let second_value = {
let mut line_iter = line.split(':');
line_iter.next();
line_iter.remainder().unwrap().trim().to_string()
};
key_value.insert(
line.split(':')
.collect::<Vec<&str>>()
@ -95,12 +104,7 @@ pub fn get_kv(markdown: String) -> (HashMap<String, String>, String) {
.unwrap()
.trim()
.to_string(),
line.split(':')
.collect::<Vec<&str>>()
.get(1)
.unwrap()
.trim()
.to_string(),
second_value,
);
}

View file

@ -2,10 +2,11 @@ use std::fs::read_to_string;
pub mod config;
pub mod emoji;
pub mod index;
pub mod syndication;
use mlem::*;
use std::env;
fn main(){
fn main() {
let args: Vec<String> = env::args().collect();
if args.len() == 1 {
@ -24,27 +25,46 @@ fn generate() {
let mut post_index: Vec<index::BlogPost> = Vec::new();
for file in raw_files {
let mut markdown = read_to_string(file.path).expect("File does not exist");
markdown = emoji::emoji_pass(
&markdown,
&config.emoji_config,
);
let markdown = read_to_string(file.path).expect("File does not exist");
let (mut frontmatter, mut markdown) = get_frontmatter(markdown);
markdown = emoji::emoji_pass(&markdown, &config.emoji_config);
let (html, index_content) = generate_blog_entry(markdown, &config.templates_dir);
let (html, index_content) =
generate_blog_entry(markdown, &config.templates_dir, &mut frontmatter)
.unwrap_or_else(|| panic!("Error generating entry {}", &file.title));
write_to_fs(
html,
&config.output_dir,
&file.file_name.clone().into_string().unwrap(),
);
let last_updated: Option<&String> = frontmatter.get("updated_at");
let updated = {
if last_updated.is_some() {
chrono::DateTime::parse_from_rfc3339(last_updated.unwrap())
.unwrap()
.timestamp()
} else {
file.date.clone().timestamp()
}
};
post_index.push(index::BlogPost::new(
file.title,
file.date.format("%Y-%m-%d").to_string(),
file.date.timestamp(),
updated,
index_content,
format!("{}.html", file.file_name.to_str().unwrap()),
));
}
if config.syndication.is_some() {
if config.syndication.as_ref().unwrap().atom.is_some() {
let atom = syndication::atom::generate(config.syndication.unwrap(), &post_index);
std::fs::write(format!("{}/atom.xml", config.output_dir), atom)
.unwrap_or_else(|_| panic!("Error writing atom feed"));
}
}
let index = index::generate(post_index, &config.templates_dir);
std::fs::write(format!("{}/index.html", config.output_dir), index)
@ -56,7 +76,7 @@ fn create_default_env() {
let config = config::Config::default();
let toml_config = toml::to_string(&config).unwrap();
std::fs::write("mlem.toml", toml_config).unwrap();
println!("Creating default directorys");
println!("Creating default directories");
std::fs::create_dir(config.output_dir).unwrap();
std::fs::create_dir(config.src_dir).unwrap();
std::fs::create_dir(config.templates_dir).unwrap();

99
src/syndication/atom.rs Normal file
View file

@ -0,0 +1,99 @@
use xml_builder::{XMLBuilder, XMLElement, XMLVersion};
use crate::{xml_tag_attribute, xml_tag_text};
pub fn generate(
config: crate::config::Syndication,
post_index: &Vec<crate::index::BlogPost>,
) -> String {
let mut xml = XMLBuilder::new()
.version(XMLVersion::XML1_0)
.encoding("UTF-8".into())
.build();
let mut feed = XMLElement::new("feed");
feed.add_attribute("xmlns", "http://www.w3.org/2005/Atom");
xml_tag_text!("id", config.link.clone(), feed);
xml_tag_text!("title", config.title, feed);
xml_tag_attribute!("link", feed, ("href", &config.link));
let mut author = XMLElement::new("author");
let mut name = XMLElement::new("name");
name.add_text(config.author).unwrap();
author.add_child(name).unwrap();
feed.add_child(author).unwrap();
let last_update = chrono::DateTime::from_timestamp(last_update(post_index), 0).unwrap();
xml_tag_text!("updated", last_update.to_rfc3339(), feed);
if config.subtitle.is_some() {
xml_tag_text!("subtitle", config.subtitle.unwrap(), feed);
}
if config.icon.is_some() {
xml_tag_text!("icon", config.icon.unwrap(), feed);
}
for post in post_index {
let mut entry = XMLElement::new("entry");
xml_tag_text!("title", post.title.clone(), entry);
xml_tag_attribute!(
"link",
entry,
(
"href",
&format!(
"{}{}/{}",
&config.link, &config.blog_root, &post.output_file_name
)
)
);
xml_tag_text!(
"id",
format!(
"{}{}/{}",
&config.link, &config.blog_root, &post.output_file_name
),
entry
);
let last_update = chrono::DateTime::from_timestamp(post.last_updated, 0)
.unwrap()
.to_rfc3339();
xml_tag_text!("updated", last_update, entry);
let mut content = XMLElement::new("content");
content.add_attribute("type", "html");
content
.add_text(crate::syndication::escape_html(post.content.clone()))
.unwrap();
entry.add_child(content).unwrap();
feed.add_child(entry).unwrap();
}
xml.set_root_element(feed);
let mut writer: Vec<u8> = Vec::new();
xml.generate(&mut writer).unwrap();
String::from_utf8(writer).unwrap()
}
fn last_update(post_index: &Vec<crate::index::BlogPost>) -> i64 {
let mut last_timestamp: i64 = i64::MIN;
for post in post_index {
if post.sort_date > last_timestamp {
last_timestamp = post.sort_date;
}
}
last_timestamp
}

26
src/syndication/mod.rs Normal file
View file

@ -0,0 +1,26 @@
pub mod atom;
#[macro_export]
macro_rules! xml_tag_text {
($a: expr, $b: expr, $c: expr) => {
let mut tag = XMLElement::new($a);
tag.add_text($b).unwrap();
$c.add_child(tag).unwrap();
};
}
#[macro_export]
macro_rules! xml_tag_attribute {
($a: expr, $b: expr, $( $c: expr),+ ) => {
let mut tag = XMLElement::new($a);
$(
tag.add_attribute($c.0,$c.1);
)+
$b.add_child(tag).unwrap();
};
}
fn escape_html(unescaped_html: String) -> String {
html_escape::encode_text(&unescaped_html).to_string()
}