Allow date format override, Use anyhow::Result to report conversion errors

This commit is contained in:
projectmoon 2024-03-21 13:51:48 +01:00
parent 20f85d7e72
commit 716a93b54c
2 changed files with 66 additions and 27 deletions

View File

@ -13,24 +13,23 @@ use germ::convert::{self as germ_convert, Target};
use germ::request::{request as gemini_request, Response as GeminiResponse}; use germ::request::{request as gemini_request, Response as GeminiResponse};
use url::Url; use url::Url;
fn parse_gemfeed_gemtext(base_url: &Url, gemfeed: &GemtextAst) -> Vec<GemfeedEntry> { use crate::Cli;
fn parse_gemfeed_gemtext(base_url: &Url, gemfeed: &GemtextAst) -> Result<Vec<GemfeedEntry>> {
gemfeed gemfeed
.inner() .inner()
.into_iter() .into_iter()
.filter_map(|node| GemfeedEntry::from_ast(base_url, node)) .map(|node| GemfeedEntry::from_ast(base_url, node))
.collect() .collect()
} }
fn parse_gemfeed_atom(feed: &str) -> Result<Vec<GemfeedEntry>> { fn parse_gemfeed_atom(feed: &str, settings: &GemfeedParserSettings) -> Result<Vec<GemfeedEntry>> {
let feed = feed.parse::<AtomFeed>()?; let feed = feed.parse::<AtomFeed>()?;
let entries = feed feed.entries()
.entries()
.into_iter() .into_iter()
.filter_map(|entry| GemfeedEntry::from_atom(entry)) .map(|entry| GemfeedEntry::from_atom(entry, &settings.atom_date_format))
.collect::<Vec<_>>(); .collect()
Ok(entries)
} }
enum GemfeedType { enum GemfeedType {
@ -67,6 +66,30 @@ pub struct Gemfeed {
entries: Vec<GemfeedEntry>, entries: Vec<GemfeedEntry>,
} }
/// Settings for controlling how the Gemfeed is parsed.
pub struct GemfeedParserSettings<'a> {
atom_date_format: &'a str,
}
impl<'a> From<&'a Cli> for GemfeedParserSettings<'a> {
fn from(cli: &'a Cli) -> Self {
cli.date_format
.as_deref()
.map(|date_fmt| GemfeedParserSettings {
atom_date_format: date_fmt,
})
.unwrap_or(Self::default())
}
}
impl Default for GemfeedParserSettings<'_> {
fn default() -> Self {
GemfeedParserSettings {
atom_date_format: "%Y-%m-%d %H:%M:%S %:z",
}
}
}
#[allow(dead_code)] #[allow(dead_code)]
impl Gemfeed { impl Gemfeed {
pub fn new(url: &Url, entries: Vec<GemfeedEntry>) -> Gemfeed { pub fn new(url: &Url, entries: Vec<GemfeedEntry>) -> Gemfeed {
@ -77,10 +100,14 @@ impl Gemfeed {
} }
pub fn load(url: &Url) -> Result<Gemfeed> { pub fn load(url: &Url) -> Result<Gemfeed> {
Self::load_with_settings(url, &GemfeedParserSettings::default())
}
pub fn load_with_settings(url: &Url, settings: &GemfeedParserSettings) -> Result<Gemfeed> {
let resp = gemini_request(url)?; let resp = gemini_request(url)?;
match GemfeedType::from(resp.meta()) { match GemfeedType::from(resp.meta()) {
GemfeedType::Gemtext => Self::load_from_gemtext(url, resp), GemfeedType::Gemtext => Self::load_from_gemtext(url, resp),
GemfeedType::Atom => Self::load_from_atom(url, resp), GemfeedType::Atom => Self::load_from_atom(url, resp, &settings),
_ => Err(anyhow!( _ => Err(anyhow!(
"Unrecognized Gemfeed mime type [meta={}]", "Unrecognized Gemfeed mime type [meta={}]",
resp.meta() resp.meta()
@ -88,9 +115,13 @@ impl Gemfeed {
} }
} }
fn load_from_atom(url: &Url, resp: GeminiResponse) -> Result<Gemfeed> { fn load_from_atom(
url: &Url,
resp: GeminiResponse,
settings: &GemfeedParserSettings,
) -> Result<Gemfeed> {
if let Some(content) = resp.content() { if let Some(content) = resp.content() {
let entries = parse_gemfeed_atom(content)?; let entries = parse_gemfeed_atom(content, settings)?;
Ok(Self::new(url, entries)) Ok(Self::new(url, entries))
} else { } else {
Err(anyhow!("Not a valid Atom Gemfeed")) Err(anyhow!("Not a valid Atom Gemfeed"))
@ -105,7 +136,7 @@ impl Gemfeed {
// TODO should be some actual validation of the feed here. // TODO should be some actual validation of the feed here.
if let Some(ref feed) = maybe_feed { if let Some(ref feed) = maybe_feed {
let entries = parse_gemfeed_gemtext(url, feed); let entries = parse_gemfeed_gemtext(url, feed)?;
Ok(Self::new(url, entries)) Ok(Self::new(url, entries))
} else { } else {
Err(anyhow!("Not a valid Gemtextg Gemfeed")) Err(anyhow!("Not a valid Gemtextg Gemfeed"))
@ -157,37 +188,38 @@ pub struct GemfeedEntry {
#[allow(dead_code)] #[allow(dead_code)]
impl GemfeedEntry { impl GemfeedEntry {
pub fn from_ast(base_url: &Url, node: &GemtextNode) -> Option<GemfeedEntry> { pub fn from_ast(base_url: &Url, node: &GemtextNode) -> Result<GemfeedEntry> {
let link = GemfeedLink::try_from(node).ok()?; let link = GemfeedLink::try_from(node)?;
// Gemfeeds have only the date--lock to 12pm UTC as a guess. // Gemfeeds have only the date--lock to 12pm UTC as a guess.
let publish_date = link let publish_date = link
.published .published
.map(|date| NaiveDateTime::parse_from_str(&date, "%Y-%m-%d"))? .map(|date| NaiveDateTime::parse_from_str(&date, "%Y-%m-%d"))
.ok()? .ok_or(anyhow!("No publish date found"))??
.with_hour(12)? .with_hour(12)
.unwrap()
.and_utc(); .and_utc();
Some(GemfeedEntry { Ok(GemfeedEntry {
title: link.title, title: link.title,
url: base_url.join(&link.path).ok()?, url: base_url.join(&link.path)?,
slug: link.slug, slug: link.slug,
published: Some(publish_date), published: Some(publish_date),
body: OnceCell::new(), body: OnceCell::new(),
}) })
} }
pub fn from_atom(entry: &AtomEntry) -> Option<GemfeedEntry> { pub fn from_atom(entry: &AtomEntry, date_format: &str) -> Result<GemfeedEntry> {
let link = GemfeedLink::try_from(entry).ok()?; let link = GemfeedLink::try_from(entry)?;
let publish_date = link let publish_date = link
.published .published
.map(|date| DateTime::parse_from_str(&date, "%Y-%m-%d %H:%M:%S %:z"))? .ok_or(anyhow!("No publish date found"))
.ok()? .map(|date| DateTime::parse_from_str(&date, date_format))??
.to_utc(); .to_utc();
Some(GemfeedEntry { Ok(GemfeedEntry {
title: link.title, title: link.title,
url: Url::parse(&link.path).ok()?, url: Url::parse(&link.path)?,
slug: link.slug, slug: link.slug,
published: Some(publish_date), published: Some(publish_date),
body: OnceCell::new(), body: OnceCell::new(),

View File

@ -1,5 +1,6 @@
use crate::{gemfeed::Gemfeed, wf::WriteFreelyCredentials}; use crate::{gemfeed::Gemfeed, wf::WriteFreelyCredentials};
use clap::{Parser, Subcommand}; use clap::{Parser, Subcommand};
use gemfeed::GemfeedParserSettings;
use std::collections::HashSet; use std::collections::HashSet;
use url::Url; use url::Url;
@ -21,6 +22,10 @@ struct Cli {
#[arg(short = 'a', long, value_name = "ALIAS")] #[arg(short = 'a', long, value_name = "ALIAS")]
wf_alias: Option<String>, wf_alias: Option<String>,
/// Optional date format override for parsing Gemlog Atom publish dates.
#[arg(long, value_name = "FMT")]
date_format: Option<String>,
#[command(subcommand)] #[command(subcommand)]
command: Option<Command>, command: Option<Command>,
} }
@ -101,15 +106,17 @@ async fn sync(
.as_deref() .as_deref()
.expect("WriteFreely access token required"); .expect("WriteFreely access token required");
let settings = GemfeedParserSettings::from(cli);
let gemfeed_url = Url::parse(gemlog_url)?; let gemfeed_url = Url::parse(gemlog_url)?;
let wf_url = Url::parse(wf_url)?; let wf_url = Url::parse(wf_url)?;
let wf_creds = WriteFreelyCredentials::AccessToken(wf_token); let wf_creds = WriteFreelyCredentials::AccessToken(wf_token);
let wf_alias = cli.wf_alias.as_deref().expect("WriteFreely Alias required"); let wf_alias = cli.wf_alias.as_deref().expect("WriteFreely Alias required");
let wf_client = wf::WriteFreely::new(&wf_url, wf_alias, &wf_creds).await?; let wf_client = wf::WriteFreely::new(&wf_url, wf_alias, &wf_creds).await?;
let mut gemfeed = Gemfeed::load(&gemfeed_url)?;
let mut gemfeed = Gemfeed::load_with_settings(&gemfeed_url, &settings)?;
sync_gemlog(&config, &mut gemfeed, &wf_client).await?; sync_gemlog(&config, &mut gemfeed, &wf_client).await?;
Ok(()) Ok(())
} }