From 716a93b54c1d6cfd7b7283d98f14e55c345b3154 Mon Sep 17 00:00:00 2001 From: projectmoon Date: Thu, 21 Mar 2024 13:51:48 +0100 Subject: [PATCH] Allow date format override, Use anyhow::Result to report conversion errors --- src/gemfeed.rs | 84 ++++++++++++++++++++++++++++++++++---------------- src/main.rs | 9 +++++- 2 files changed, 66 insertions(+), 27 deletions(-) diff --git a/src/gemfeed.rs b/src/gemfeed.rs index 5ee88b9..09fd366 100644 --- a/src/gemfeed.rs +++ b/src/gemfeed.rs @@ -13,24 +13,23 @@ use germ::convert::{self as germ_convert, Target}; use germ::request::{request as gemini_request, Response as GeminiResponse}; use url::Url; -fn parse_gemfeed_gemtext(base_url: &Url, gemfeed: &GemtextAst) -> Vec { +use crate::Cli; + +fn parse_gemfeed_gemtext(base_url: &Url, gemfeed: &GemtextAst) -> Result> { gemfeed .inner() .into_iter() - .filter_map(|node| GemfeedEntry::from_ast(base_url, node)) + .map(|node| GemfeedEntry::from_ast(base_url, node)) .collect() } -fn parse_gemfeed_atom(feed: &str) -> Result> { +fn parse_gemfeed_atom(feed: &str, settings: &GemfeedParserSettings) -> Result> { let feed = feed.parse::()?; - let entries = feed - .entries() + feed.entries() .into_iter() - .filter_map(|entry| GemfeedEntry::from_atom(entry)) - .collect::>(); - - Ok(entries) + .map(|entry| GemfeedEntry::from_atom(entry, &settings.atom_date_format)) + .collect() } enum GemfeedType { @@ -67,6 +66,30 @@ pub struct Gemfeed { entries: Vec, } +/// Settings for controlling how the Gemfeed is parsed. +pub struct GemfeedParserSettings<'a> { + atom_date_format: &'a str, +} + +impl<'a> From<&'a Cli> for GemfeedParserSettings<'a> { + fn from(cli: &'a Cli) -> Self { + cli.date_format + .as_deref() + .map(|date_fmt| GemfeedParserSettings { + atom_date_format: date_fmt, + }) + .unwrap_or(Self::default()) + } +} + +impl Default for GemfeedParserSettings<'_> { + fn default() -> Self { + GemfeedParserSettings { + atom_date_format: "%Y-%m-%d %H:%M:%S %:z", + } + } +} + #[allow(dead_code)] impl Gemfeed { pub fn new(url: &Url, entries: Vec) -> Gemfeed { @@ -77,10 +100,14 @@ impl Gemfeed { } pub fn load(url: &Url) -> Result { + Self::load_with_settings(url, &GemfeedParserSettings::default()) + } + + pub fn load_with_settings(url: &Url, settings: &GemfeedParserSettings) -> Result { let resp = gemini_request(url)?; match GemfeedType::from(resp.meta()) { GemfeedType::Gemtext => Self::load_from_gemtext(url, resp), - GemfeedType::Atom => Self::load_from_atom(url, resp), + GemfeedType::Atom => Self::load_from_atom(url, resp, &settings), _ => Err(anyhow!( "Unrecognized Gemfeed mime type [meta={}]", resp.meta() @@ -88,9 +115,13 @@ impl Gemfeed { } } - fn load_from_atom(url: &Url, resp: GeminiResponse) -> Result { + fn load_from_atom( + url: &Url, + resp: GeminiResponse, + settings: &GemfeedParserSettings, + ) -> Result { if let Some(content) = resp.content() { - let entries = parse_gemfeed_atom(content)?; + let entries = parse_gemfeed_atom(content, settings)?; Ok(Self::new(url, entries)) } else { Err(anyhow!("Not a valid Atom Gemfeed")) @@ -105,7 +136,7 @@ impl Gemfeed { // TODO should be some actual validation of the feed here. if let Some(ref feed) = maybe_feed { - let entries = parse_gemfeed_gemtext(url, feed); + let entries = parse_gemfeed_gemtext(url, feed)?; Ok(Self::new(url, entries)) } else { Err(anyhow!("Not a valid Gemtextg Gemfeed")) @@ -157,37 +188,38 @@ pub struct GemfeedEntry { #[allow(dead_code)] impl GemfeedEntry { - pub fn from_ast(base_url: &Url, node: &GemtextNode) -> Option { - let link = GemfeedLink::try_from(node).ok()?; + pub fn from_ast(base_url: &Url, node: &GemtextNode) -> Result { + let link = GemfeedLink::try_from(node)?; // Gemfeeds have only the date--lock to 12pm UTC as a guess. let publish_date = link .published - .map(|date| NaiveDateTime::parse_from_str(&date, "%Y-%m-%d"))? - .ok()? - .with_hour(12)? + .map(|date| NaiveDateTime::parse_from_str(&date, "%Y-%m-%d")) + .ok_or(anyhow!("No publish date found"))?? + .with_hour(12) + .unwrap() .and_utc(); - Some(GemfeedEntry { + Ok(GemfeedEntry { title: link.title, - url: base_url.join(&link.path).ok()?, + url: base_url.join(&link.path)?, slug: link.slug, published: Some(publish_date), body: OnceCell::new(), }) } - pub fn from_atom(entry: &AtomEntry) -> Option { - let link = GemfeedLink::try_from(entry).ok()?; + pub fn from_atom(entry: &AtomEntry, date_format: &str) -> Result { + let link = GemfeedLink::try_from(entry)?; let publish_date = link .published - .map(|date| DateTime::parse_from_str(&date, "%Y-%m-%d %H:%M:%S %:z"))? - .ok()? + .ok_or(anyhow!("No publish date found")) + .map(|date| DateTime::parse_from_str(&date, date_format))?? .to_utc(); - Some(GemfeedEntry { + Ok(GemfeedEntry { title: link.title, - url: Url::parse(&link.path).ok()?, + url: Url::parse(&link.path)?, slug: link.slug, published: Some(publish_date), body: OnceCell::new(), diff --git a/src/main.rs b/src/main.rs index cd50699..e980183 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,6 @@ use crate::{gemfeed::Gemfeed, wf::WriteFreelyCredentials}; use clap::{Parser, Subcommand}; +use gemfeed::GemfeedParserSettings; use std::collections::HashSet; use url::Url; @@ -21,6 +22,10 @@ struct Cli { #[arg(short = 'a', long, value_name = "ALIAS")] wf_alias: Option, + /// Optional date format override for parsing Gemlog Atom publish dates. + #[arg(long, value_name = "FMT")] + date_format: Option, + #[command(subcommand)] command: Option, } @@ -101,15 +106,17 @@ async fn sync( .as_deref() .expect("WriteFreely access token required"); + let settings = GemfeedParserSettings::from(cli); let gemfeed_url = Url::parse(gemlog_url)?; let wf_url = Url::parse(wf_url)?; let wf_creds = WriteFreelyCredentials::AccessToken(wf_token); let wf_alias = cli.wf_alias.as_deref().expect("WriteFreely Alias required"); let wf_client = wf::WriteFreely::new(&wf_url, wf_alias, &wf_creds).await?; - let mut gemfeed = Gemfeed::load(&gemfeed_url)?; + let mut gemfeed = Gemfeed::load_with_settings(&gemfeed_url, &settings)?; sync_gemlog(&config, &mut gemfeed, &wf_client).await?; + Ok(()) }