Initial commit.

This commit is contained in:
projectmoon 2024-03-21 11:11:48 +01:00
commit c4d1a7a2a2
8 changed files with 2530 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/target

1829
Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

19
Cargo.toml Normal file
View File

@ -0,0 +1,19 @@
[package]
name = "gemfed"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
anyhow = "1.0.81"
atom_syndication = "0.12.2"
chrono = "0.4.35"
clap = { version = "4.5.3", features = ["derive"] }
gemini-feed = "0.1.0"
germ = "0.3"
once_cell = "1.19.0"
regex = "1.10.3"
tokio = {version = "1.36", features = [ "full" ] }
url = "2.5.0"
writefreely_client = "0.2.0"

327
src/gemfeed.rs Normal file
View File

@ -0,0 +1,327 @@
use chrono::{DateTime, NaiveDateTime, Timelike, Utc};
use once_cell::sync::OnceCell;
use regex::Regex;
use std::borrow::Cow;
use std::path::PathBuf;
use std::result::Result as StdResult;
use std::slice::IterMut;
use anyhow::{anyhow, Error, Result};
use atom_syndication::{Entry as AtomEntry, Feed as AtomFeed};
use germ::ast::{Ast as GemtextAst, Node as GemtextNode};
use germ::convert::{self as germ_convert, Target};
use germ::request::{request as gemini_request, Response as GeminiResponse};
use url::Url;
fn parse_gemfeed_gemtext(base_url: &Url, gemfeed: &GemtextAst) -> Vec<GemfeedEntry> {
gemfeed
.inner()
.into_iter()
.filter_map(|node| GemfeedEntry::from_ast(base_url, node))
.collect()
}
fn parse_gemfeed_atom(feed: &str) -> Result<Vec<GemfeedEntry>> {
let feed = feed.parse::<AtomFeed>()?;
let entries = feed
.entries()
.into_iter()
.filter_map(|entry| GemfeedEntry::from_atom(entry))
.collect::<Vec<_>>();
Ok(entries)
}
enum GemfeedType {
Gemtext,
Atom,
Unknown,
}
impl GemfeedType {
const ATOM_MIME_TYPES: &'static [&'static str] = &["text/xml", "application/atom+xml"];
}
impl From<Cow<'_, str>> for GemfeedType {
// See https://github.com/gemrest/germ/issues/2. Will be converted
// to use germ Meta struct after this is fixed.
fn from(mime: Cow<'_, str>) -> Self {
let is_atom = Self::ATOM_MIME_TYPES
.into_iter()
.any(|atom_mime| mime.contains(atom_mime));
if is_atom {
GemfeedType::Atom
} else if mime.contains("text/gemini") {
GemfeedType::Gemtext
} else {
GemfeedType::Unknown
}
}
}
#[derive(Debug)]
pub struct Gemfeed {
url: Url,
entries: Vec<GemfeedEntry>,
}
#[allow(dead_code)]
impl Gemfeed {
pub fn new(url: &Url, entries: Vec<GemfeedEntry>) -> Gemfeed {
Gemfeed {
url: url.clone(),
entries,
}
}
pub fn load(url: &Url) -> Result<Gemfeed> {
let resp = gemini_request(url)?;
match GemfeedType::from(resp.meta()) {
GemfeedType::Gemtext => Self::load_from_gemtext(url, resp),
GemfeedType::Atom => Self::load_from_atom(url, resp),
_ => Err(anyhow!(
"Unrecognized Gemfeed mime type [meta={}]",
resp.meta()
)),
}
}
fn load_from_atom(url: &Url, resp: GeminiResponse) -> Result<Gemfeed> {
if let Some(content) = resp.content() {
let entries = parse_gemfeed_atom(content)?;
Ok(Self::new(url, entries))
} else {
Err(anyhow!("Not a valid Atom Gemfeed"))
}
}
fn load_from_gemtext(url: &Url, resp: GeminiResponse) -> Result<Gemfeed> {
let maybe_feed = resp
.content()
.to_owned()
.map(|text| GemtextAst::from_value(&text));
// TODO should be some actual validation of the feed here.
if let Some(ref feed) = maybe_feed {
let entries = parse_gemfeed_gemtext(url, feed);
Ok(Self::new(url, entries))
} else {
Err(anyhow!("Not a valid Gemtextg Gemfeed"))
}
}
pub fn slugs(&self) -> Vec<String> {
self.entries()
.map(|entry| entry.slug().to_owned())
.collect()
}
pub fn url(&self) -> &Url {
&self.url
}
pub fn entries(&self) -> impl Iterator<Item = &GemfeedEntry> {
self.entries.iter()
}
pub fn entries_mut(&mut self) -> IterMut<GemfeedEntry> {
self.entries.iter_mut()
}
pub fn find_entry_by_slug<S: AsRef<str>>(&self, slug: S) -> Option<&GemfeedEntry> {
let slug = slug.as_ref();
self.entries().find(|entry| entry.slug() == slug)
}
pub fn find_mut_entry_by_slug<S: AsRef<str>>(&mut self, slug: S) -> Option<&mut GemfeedEntry> {
let slug = slug.as_ref();
self.entries_mut().find(|entry| entry.slug() == slug)
}
}
#[allow(dead_code)]
#[derive(Debug)]
pub struct GemfeedEntry {
title: String,
slug: String,
published: Option<DateTime<Utc>>,
/// Full URL of the gemlog post.
url: Url,
/// Must be loaded by calling the body() method.
body: OnceCell<String>,
}
#[allow(dead_code)]
impl GemfeedEntry {
pub fn from_ast(base_url: &Url, node: &GemtextNode) -> Option<GemfeedEntry> {
let link = GemfeedLink::try_from(node).ok()?;
// Gemfeeds have only the date--lock to 12pm UTC as a guess.
let publish_date = link
.published
.map(|date| NaiveDateTime::parse_from_str(&date, "%Y-%m-%d"))?
.ok()?
.with_hour(12)?
.and_utc();
Some(GemfeedEntry {
title: link.title,
url: base_url.join(&link.path).ok()?,
slug: link.slug,
published: Some(publish_date),
body: OnceCell::new(),
})
}
pub fn from_atom(entry: &AtomEntry) -> Option<GemfeedEntry> {
let link = GemfeedLink::try_from(entry).ok()?;
let publish_date = link
.published
.map(|date| DateTime::parse_from_rfc3339(&date))?
.ok()?
.to_utc();
Some(GemfeedEntry {
title: link.title,
url: Url::parse(&link.path).ok()?,
slug: link.slug,
published: Some(publish_date),
body: OnceCell::new(),
})
}
pub fn title(&self) -> &str {
&self.title
}
pub fn slug(&self) -> &str {
&self.slug
}
pub fn published(&self) -> Option<&DateTime<Utc>> {
self.published.as_ref()
}
/// Full URL of the gemlog post.
pub fn url(&self) -> &Url {
&self.url
}
pub fn body(&self) -> Result<&String, Error> {
self.body.get_or_try_init(|| {
let resp = gemini_request(&self.url)?;
Ok(resp.content().to_owned().unwrap_or_default())
})
}
pub fn body_mut(&mut self) -> Result<&mut String, Error> {
// Forces init and also returns the error if init failed.
if let Err(error) = self.body() {
return Err(error);
}
// Which means that this Should Be Safe™
Ok(self
.body
.get_mut()
.expect("Body not initialized when it should be"))
}
/// The gemtext body of the gemlog post, represented as a
/// germ::Ast. The body is loaded lazily when this method is first
/// called.
pub fn body_as_ast(&self) -> Result<GemtextAst, Error> {
self.body().map(|text| GemtextAst::from_value(&text))
}
pub fn body_as_markdown(&self) -> Result<String, Error> {
self.body_as_ast()
.map(|body| germ_convert::from_ast(&body, &Target::Markdown))
}
}
struct GemfeedLink {
path: String,
title: String,
slug: String,
published: Option<String>,
}
impl TryFrom<&GemtextNode> for GemfeedLink {
type Error = anyhow::Error;
fn try_from(node: &GemtextNode) -> StdResult<Self, Self::Error> {
let entry: Option<GemfeedLink> = if let GemtextNode::Link {
text: Some(title),
to: path,
} = node.to_owned()
{
let re = Regex::new(r#"(\d\d\d\d-\d\d-\d\d)"#).unwrap();
let path_buf = PathBuf::from(&path);
let published: Option<String> = re
.captures_at(&title, 0)
.map(|caps| caps.get(0))
.and_then(|date| date.map(|published| published.as_str().to_owned()));
let stem = match published {
Some(_) => path_buf.file_stem(),
_ => None,
};
let maybe_slug = stem.map(|s| s.to_string_lossy());
maybe_slug.map(|slug| GemfeedLink {
title,
path,
published,
slug: slug.to_string(),
})
} else {
None
};
entry.ok_or(anyhow!("Not a Gemfeed link"))
}
}
impl TryFrom<&AtomEntry> for GemfeedLink {
type Error = anyhow::Error;
fn try_from(entry: &AtomEntry) -> StdResult<Self, Self::Error> {
let link = entry
.links()
.iter()
.find(|link| link.rel == "alternate")
.map(|link| link.href.clone())
.ok_or(anyhow!("No post link present"))?;
let link_url = Url::parse(&link)?;
let post_filename = link_url
.path_segments()
.and_then(|segments| segments.last())
.map(|filename| PathBuf::from(filename));
let maybe_slug = match post_filename {
Some(ref pathbuf) => pathbuf
.file_stem()
.map(|stem| stem.to_string_lossy().to_string()),
_ => None,
};
let title = entry.title().to_string();
let published = entry.published();
if let Some(slug) = maybe_slug {
Ok(GemfeedLink {
path: link.clone(),
published: published.map(|date| date.to_string()),
title,
slug,
})
} else {
Err(anyhow!("Slug could not be calculated: [url={}]", link_url))
}
}
}

213
src/main.rs Normal file
View File

@ -0,0 +1,213 @@
use crate::{gemfeed::Gemfeed, wf::WriteFreelyCredentials};
use clap::{Parser, Subcommand};
use std::collections::HashSet;
use url::Url;
use anyhow::Result;
use wf::WriteFreely;
mod gemfeed;
mod sanitization;
mod wf;
#[derive(Parser, Debug)]
#[command(version, about, long_about = None)]
struct Cli {
/// WriteFreely access token. Required for sync and logout.
#[arg(short = 't', long, value_name = "TOKEN")]
wf_access_token: Option<String>,
/// WriteFreely blog name/alias. Usually the same as username.
#[arg(short = 'a', long, value_name = "ALIAS")]
wf_alias: Option<String>,
#[command(subcommand)]
command: Option<Command>,
}
#[derive(Subcommand, Debug)]
enum Command {
/// Logs in to WriteFreely and prints an access token.
Login {
/// Root URL of WriteFreely instance.
#[arg(long, value_name = "URL")]
wf_url: String,
/// WriteFreely username.
#[arg(short, long)]
username: String,
/// WriteFreely password.
#[arg(short, long)]
password: String,
},
/// Logs out from WriteFreely.
Logout {
/// Root URL of WriteFreely instance.
#[arg(long, value_name = "URL")]
wf_url: String,
},
/// Synchronize Gemlog posts from Gemini to WriteFreely.
Sync {
/// Full gemini:// URL of Gemlog (Atom feed or Gemfeed).
#[arg(long, value_name = "URL")]
gemlog_url: String,
/// Root URL of WriteFreely instance.
#[arg(long, value_name = "URL")]
wf_url: String,
/// Optional santization rule: Remove all text BEFORE this
/// marker in the Gemlog post.
#[arg(long)]
strip_before_marker: Option<String>,
/// Optional santization rule: Remove all text AFTER this
/// marker in the Gemlog post.
#[arg(long)]
strip_after_marker: Option<String>,
},
}
struct SanitizeConfig<'a> {
strip_before_marker: &'a Option<String>,
strip_after_marker: &'a Option<String>,
}
fn sanitize_gemlogs(gemfeed: &mut Gemfeed, config: &SanitizeConfig) -> Result<()> {
for entry in gemfeed.entries_mut() {
if let Some(ref before_marker) = config.strip_before_marker {
sanitization::strip_before(entry, before_marker)?;
}
if let Some(ref after_marker) = config.strip_after_marker {
sanitization::strip_after(entry, after_marker)?;
}
}
Ok(())
}
async fn sync(
cli: &Cli,
gemlog_url: &str,
wf_url: &str,
config: &SanitizeConfig<'_>,
) -> Result<()> {
let wf_token = cli
.wf_access_token
.as_deref()
.expect("WriteFreely access token required");
let gemfeed_url = Url::parse(gemlog_url)?;
let wf_url = Url::parse(wf_url)?;
let wf_creds = WriteFreelyCredentials::AccessToken(wf_token);
let wf_alias = cli.wf_alias.as_deref().expect("WriteFreely Alias required");
let wf_client = wf::WriteFreely::new(&wf_url, wf_alias, &wf_creds).await?;
let mut gemfeed = Gemfeed::load(&gemfeed_url)?;
sync_gemlog(&config, &mut gemfeed, &wf_client).await?;
Ok(())
}
async fn sync_gemlog(
config: &SanitizeConfig<'_>,
gemfeed: &mut Gemfeed,
wf: &WriteFreely,
) -> Result<()> {
println!(
"Beginning sync of posts for WriteFreely user: {}",
wf.user().await?
);
let wf_slugs: HashSet<_> = wf.slugs().await?.into_iter().collect();
let gemfeed_slugs: HashSet<_> = gemfeed.slugs().into_iter().collect();
let slugs_to_post: Vec<_> = gemfeed_slugs.difference(&wf_slugs).collect();
sanitize_gemlogs(gemfeed, config)?;
let gemlogs_to_post = slugs_to_post
.into_iter()
.flat_map(|slug| gemfeed.find_entry_by_slug(slug));
let mut count = 0;
for entry in gemlogs_to_post {
let post = wf.create_post(entry).await?;
count += 1;
println!(
"Created post: {} [title={}]",
post.id,
post.title.unwrap_or_default()
);
}
println!("Post synchronization complete [posts synced={}]", count);
Ok(())
}
async fn wf_login(wf_url: &str, username: &str, password: &str) -> Result<()> {
let wf_url = Url::parse(wf_url)?;
let creds = WriteFreelyCredentials::UsernameAndPassword(username, password);
let wf_client = wf::WriteFreely::new(&wf_url, &username, &creds).await?;
println!(
"{}",
wf_client.access_token().unwrap_or("[No Token Returned]")
);
Ok(())
}
async fn wf_logout(wf_url: &str, wf_alias: &str, access_token: &str) -> Result<()> {
let wf_url = Url::parse(wf_url)?;
let creds = WriteFreelyCredentials::AccessToken(access_token);
let wf_client = wf::WriteFreely::new(&wf_url, &wf_alias, &creds).await?;
wf_client.logout().await?;
println!("Successfully logged out from {}", wf_url);
Ok(())
}
#[tokio::main]
async fn main() -> Result<()> {
let cli = Cli::parse();
if let Some(ref cmd) = cli.command {
match cmd {
Command::Login {
ref wf_url,
ref username,
ref password,
} => wf_login(wf_url, username, password).await,
Command::Logout { ref wf_url } => {
wf_logout(
wf_url,
&cli.wf_alias.as_deref().expect("WriteFreely alias required"),
&cli.wf_access_token.expect("Access token required"),
)
.await
}
Command::Sync {
wf_url,
gemlog_url,
strip_before_marker,
strip_after_marker,
} => {
let sanitize_cfg = SanitizeConfig {
strip_before_marker,
strip_after_marker,
};
sync(&cli, gemlog_url, wf_url, &sanitize_cfg).await
}
}
} else {
Ok(())
}
}

25
src/sanitization.rs Normal file
View File

@ -0,0 +1,25 @@
use crate::gemfeed::GemfeedEntry;
use anyhow::Result;
pub fn strip_before(entry: &mut GemfeedEntry, marker: &str) -> Result<()> {
let body = entry.body_mut()?;
let sanitized_body = match body.find(marker) {
Some(index) => body.split_at(index + marker.len()).1,
_ => &body,
};
*body = sanitized_body.to_owned();
Ok(())
}
pub fn strip_after(entry: &mut GemfeedEntry, marker: &str) -> Result<()> {
let body = entry.body_mut()?;
let sanitized_body = match body.rfind(marker) {
Some(index) => body.split_at(index).0,
_ => &body,
};
*body = sanitized_body.to_owned();
Ok(())
}

100
src/wf.rs Normal file
View File

@ -0,0 +1,100 @@
use anyhow::Result;
use std::result::Result as StdResult;
use url::Url;
use writefreely_client::{
post::{Post, PostCreateRequest},
Client, Timestamp,
};
use crate::gemfeed::GemfeedEntry;
/// Wrapper struct for managing the WriteFreely connection.
pub struct WriteFreely {
client: Client,
alias: String,
}
pub enum WriteFreelyCredentials<'a> {
UsernameAndPassword(&'a str, &'a str),
AccessToken(&'a str),
}
#[allow(dead_code)]
impl WriteFreely {
/// Attempts to create and log in to the WriteFreely server.
pub async fn new(
url: &Url,
alias: &str,
creds: &WriteFreelyCredentials<'_>,
) -> Result<WriteFreely> {
use WriteFreelyCredentials::*;
let client = match creds {
UsernameAndPassword(user, pw) => Client::new(url)?.login(user, pw).await?,
AccessToken(token) => Client::new(url)?.with_token(token),
};
Ok(WriteFreely {
client,
alias: alias.to_owned(),
})
}
pub async fn user(&self) -> Result<String> {
Ok(self.client.get_authenticated_user().await?)
}
pub fn access_token(&self) -> Option<&str> {
self.client.access_token.as_deref()
}
/// Logs the client out and renders this instance of the wrapper
/// unusable.n
pub async fn logout(mut self) -> Result<()> {
self.client.logout().await?;
Ok(())
}
/// Get the slugs on the server for the alias/user.
pub async fn slugs(&self) -> Result<Vec<String>> {
let posts = self.client.collections().posts(&self.alias).list().await?;
let slugs: Vec<_> = posts
.into_iter()
.flat_map(|post| post.slug)
.map(|slug| slug.to_string())
.collect();
Ok(slugs)
}
pub async fn create_post(&self, entry: &GemfeedEntry) -> Result<Post> {
let blog = self.client.collections().posts(&self.alias);
let post = blog.create(entry.try_into()?).await?;
Ok(post)
}
}
impl TryFrom<GemfeedEntry> for PostCreateRequest {
type Error = anyhow::Error;
fn try_from(entry: GemfeedEntry) -> StdResult<Self, Self::Error> {
PostCreateRequest::try_from(&entry)
}
}
impl TryFrom<&GemfeedEntry> for PostCreateRequest {
type Error = anyhow::Error;
fn try_from(entry: &GemfeedEntry) -> StdResult<Self, Self::Error> {
let published = entry.published().map(|date| Timestamp::from(*date));
let req = PostCreateRequest::new()
.slug(entry.slug().into())
.title(entry.title())
.body(entry.body_as_markdown()?);
if let Some(publish_date) = published {
Ok(req.created(publish_date))
} else {
Ok(req)
}
}
}

16
src/writefreely.rs Normal file
View File

@ -0,0 +1,16 @@
use anyhow::{anyhow, Error, Result};
use writefreely_client::{
post::{PostCreateRequest, Slug},
Client,
};
pub async fn slugs_on_writefreely(client: &Client, alias: &str) -> Result<Vec<String>> {
let posts = client.collections().posts(alias).list().await?;
let slugs: Vec<_> = posts
.into_iter()
.flat_map(|post| post.slug)
.map(|slug| slug.to_string())
.collect();
Ok(slugs)
}