use chrono::{DateTime, NaiveDateTime, Timelike, Utc}; use once_cell::sync::OnceCell; use regex::Regex; use std::borrow::Cow; use std::path::PathBuf; use std::result::Result as StdResult; use std::slice::IterMut; use anyhow::{anyhow, Error, Result}; use atom_syndication::{Entry as AtomEntry, Feed as AtomFeed}; use germ::ast::{Ast as GemtextAst, Node as GemtextNode}; use germ::convert::{self as germ_convert, Target}; use germ::request::{request as gemini_request, Response as GeminiResponse}; use url::Url; fn parse_gemfeed_gemtext(base_url: &Url, gemfeed: &GemtextAst) -> Vec { gemfeed .inner() .into_iter() .filter_map(|node| GemfeedEntry::from_ast(base_url, node)) .collect() } fn parse_gemfeed_atom(feed: &str) -> Result> { let feed = feed.parse::()?; let entries = feed .entries() .into_iter() .filter_map(|entry| GemfeedEntry::from_atom(entry)) .collect::>(); Ok(entries) } enum GemfeedType { Gemtext, Atom, Unknown, } impl GemfeedType { const ATOM_MIME_TYPES: &'static [&'static str] = &["text/xml", "application/atom+xml"]; } impl From> for GemfeedType { // See https://github.com/gemrest/germ/issues/2. Will be converted // to use germ Meta struct after this is fixed. fn from(mime: Cow<'_, str>) -> Self { let is_atom = Self::ATOM_MIME_TYPES .into_iter() .any(|atom_mime| mime.contains(atom_mime)); if is_atom { GemfeedType::Atom } else if mime.contains("text/gemini") { GemfeedType::Gemtext } else { GemfeedType::Unknown } } } #[derive(Debug)] pub struct Gemfeed { url: Url, entries: Vec, } #[allow(dead_code)] impl Gemfeed { pub fn new(url: &Url, entries: Vec) -> Gemfeed { Gemfeed { url: url.clone(), entries, } } pub fn load(url: &Url) -> Result { let resp = gemini_request(url)?; match GemfeedType::from(resp.meta()) { GemfeedType::Gemtext => Self::load_from_gemtext(url, resp), GemfeedType::Atom => Self::load_from_atom(url, resp), _ => Err(anyhow!( "Unrecognized Gemfeed mime type [meta={}]", resp.meta() )), } } fn load_from_atom(url: &Url, resp: GeminiResponse) -> Result { if let Some(content) = resp.content() { let entries = parse_gemfeed_atom(content)?; Ok(Self::new(url, entries)) } else { Err(anyhow!("Not a valid Atom Gemfeed")) } } fn load_from_gemtext(url: &Url, resp: GeminiResponse) -> Result { let maybe_feed = resp .content() .to_owned() .map(|text| GemtextAst::from_value(&text)); // TODO should be some actual validation of the feed here. if let Some(ref feed) = maybe_feed { let entries = parse_gemfeed_gemtext(url, feed); Ok(Self::new(url, entries)) } else { Err(anyhow!("Not a valid Gemtextg Gemfeed")) } } pub fn slugs(&self) -> Vec { self.entries() .map(|entry| entry.slug().to_owned()) .collect() } pub fn url(&self) -> &Url { &self.url } pub fn entries(&self) -> impl Iterator { self.entries.iter() } pub fn entries_mut(&mut self) -> IterMut { self.entries.iter_mut() } pub fn find_entry_by_slug>(&self, slug: S) -> Option<&GemfeedEntry> { let slug = slug.as_ref(); self.entries().find(|entry| entry.slug() == slug) } pub fn find_mut_entry_by_slug>(&mut self, slug: S) -> Option<&mut GemfeedEntry> { let slug = slug.as_ref(); self.entries_mut().find(|entry| entry.slug() == slug) } } #[allow(dead_code)] #[derive(Debug)] pub struct GemfeedEntry { title: String, slug: String, published: Option>, /// Full URL of the gemlog post. url: Url, /// Must be loaded by calling the body() method. body: OnceCell, } #[allow(dead_code)] impl GemfeedEntry { pub fn from_ast(base_url: &Url, node: &GemtextNode) -> Option { let link = GemfeedLink::try_from(node).ok()?; // Gemfeeds have only the date--lock to 12pm UTC as a guess. let publish_date = link .published .map(|date| NaiveDateTime::parse_from_str(&date, "%Y-%m-%d"))? .ok()? .with_hour(12)? .and_utc(); Some(GemfeedEntry { title: link.title, url: base_url.join(&link.path).ok()?, slug: link.slug, published: Some(publish_date), body: OnceCell::new(), }) } pub fn from_atom(entry: &AtomEntry) -> Option { let link = GemfeedLink::try_from(entry).ok()?; let publish_date = link .published .map(|date| DateTime::parse_from_rfc3339(&date))? .ok()? .to_utc(); Some(GemfeedEntry { title: link.title, url: Url::parse(&link.path).ok()?, slug: link.slug, published: Some(publish_date), body: OnceCell::new(), }) } pub fn title(&self) -> &str { &self.title } pub fn slug(&self) -> &str { &self.slug } pub fn published(&self) -> Option<&DateTime> { self.published.as_ref() } /// Full URL of the gemlog post. pub fn url(&self) -> &Url { &self.url } pub fn body(&self) -> Result<&String, Error> { self.body.get_or_try_init(|| { let resp = gemini_request(&self.url)?; Ok(resp.content().to_owned().unwrap_or_default()) }) } pub fn body_mut(&mut self) -> Result<&mut String, Error> { // Forces init and also returns the error if init failed. if let Err(error) = self.body() { return Err(error); } // Which means that this Should Be Safeā„¢ Ok(self .body .get_mut() .expect("Body not initialized when it should be")) } /// The gemtext body of the gemlog post, represented as a /// germ::Ast. The body is loaded lazily when this method is first /// called. pub fn body_as_ast(&self) -> Result { self.body().map(|text| GemtextAst::from_value(&text)) } pub fn body_as_markdown(&self) -> Result { self.body_as_ast() .map(|body| germ_convert::from_ast(&body, &Target::Markdown)) } } struct GemfeedLink { path: String, title: String, slug: String, published: Option, } impl TryFrom<&GemtextNode> for GemfeedLink { type Error = anyhow::Error; fn try_from(node: &GemtextNode) -> StdResult { let entry: Option = if let GemtextNode::Link { text: Some(title), to: path, } = node.to_owned() { let re = Regex::new(r#"(\d\d\d\d-\d\d-\d\d)"#).unwrap(); let path_buf = PathBuf::from(&path); let published: Option = re .captures_at(&title, 0) .map(|caps| caps.get(0)) .and_then(|date| date.map(|published| published.as_str().to_owned())); let stem = match published { Some(_) => path_buf.file_stem(), _ => None, }; let maybe_slug = stem.map(|s| s.to_string_lossy()); maybe_slug.map(|slug| GemfeedLink { title, path, published, slug: slug.to_string(), }) } else { None }; entry.ok_or(anyhow!("Not a Gemfeed link")) } } impl TryFrom<&AtomEntry> for GemfeedLink { type Error = anyhow::Error; fn try_from(entry: &AtomEntry) -> StdResult { let link = entry .links() .iter() .find(|link| link.rel == "alternate") .map(|link| link.href.clone()) .ok_or(anyhow!("No post link present"))?; let link_url = Url::parse(&link)?; let post_filename = link_url .path_segments() .and_then(|segments| segments.last()) .map(|filename| PathBuf::from(filename)); let maybe_slug = match post_filename { Some(ref pathbuf) => pathbuf .file_stem() .map(|stem| stem.to_string_lossy().to_string()), _ => None, }; let title = entry.title().to_string(); let published = entry.published(); if let Some(slug) = maybe_slug { Ok(GemfeedLink { path: link.clone(), published: published.map(|date| date.to_string()), title, slug, }) } else { Err(anyhow!("Slug could not be calculated: [url={}]", link_url)) } } }