open-webui-filters/gemini.py

259 lines
9.6 KiB
Python
Raw Normal View History

2024-11-30 11:41:13 +00:00
"""
title: Gemini Protocol Tool
author: projectmoon
author_url: https://git.agnos.is/projectmoon/open-webui-filters
2024-12-13 22:08:51 +00:00
version: 0.2.2
2024-11-30 11:41:13 +00:00
license: AGPL-3.0+
required_open_webui_version: 0.4.3
requirements: ignition-gemini
"""
import re
import ignition
2024-11-30 19:54:08 +00:00
from ignition import RedirectResponse, SuccessResponse
2024-11-30 11:41:13 +00:00
from pydantic import BaseModel, Field
2024-11-30 19:54:08 +00:00
from typing import Optional
2024-11-30 11:41:13 +00:00
2024-12-13 16:07:46 +00:00
# From https://stackoverflow.com/questions/33404752/removing-emojis-from-a-string-in-python
def remove_emojis(text):
pattern = re.compile(pattern = "["
u"\U0001F600-\U0001F64F" # emoticons
u"\U0001F300-\U0001F5FF" # symbols & pictographs
u"\U0001F680-\U0001F6FF" # transport & map symbols
u"\U0001F1E0-\U0001F1FF" # flags (iOS)
"]+", flags = re.UNICODE)
return pattern.sub(r'',text)
def extract_title(content: str, fallback: str="") -> Optional[str]:
lines = [line.strip() for line in content.splitlines()]
title = next(filter(lambda line: line.startswith("#"), lines), None)
if title:
return remove_emojis(title.lstrip('#'))
else:
return fallback
def result_instructions(url: str, redirect: bool=False) -> str:
content_instructions = (
"Report the content to the user and answer their question."
2024-11-30 20:35:33 +00:00
"Do not make assumptions. This is the content you need. "
"Even if you think it's not the right content, report the "
"content anyway. Do not say you think it's the wrong content."
"You MUST use this content."
)
return ("# Gemini Content Fetch Result\n"
f"Content was successfully fetched for the URL: {url}\n"
) + content_instructions + "\n\n"
def instructions(url: str, redirect: bool=False) -> str:
return result_instructions(url, redirect) + (
"Here are more instructions you must follow. "
"Render all Gemini links as Markdown links. Examples:\n\n"
" - [gemini://example.com](gemini://example.com)\n"
" - [a gemini capsule](gemini://example.com)\n"
" - [my personal capsule](gemini://example.com/personal)\n\n"
"A Gemini link starts with => on the line, and then the URL follows:\n\n"
" - `=> /page A Gemini Page`: A relative link to `/page` titled `A Gemini Page`\n"
" - `=> gemini://example.com/place` An absolute link with no URL title \n\n"
"When rendering relative links, always make them absolute links.\n"
"If the link has a title, render the link title verbatim:\n"
" - `=> gemini://example.com My Page` becomes `[My Page](gemini://example.com)`\n"
" - `=> gemini://example.com` becomes `[gemini://example.com](gemini://example.com)`.\n"
" - `=> gemini://example.com 🐂 My Page` becomes `[🐂 My Page](gemini://example.com)`\n\n"
"The Gemtext content is below in the code block."
)
2024-11-30 11:41:13 +00:00
2024-12-13 16:07:46 +00:00
class GeminiFetcher:
def __init__(self, gemini_url: str, correct_urls: bool=False, web_proxy: str="", event_emitter=None):
2024-12-13 16:07:46 +00:00
self.original_url = gemini_url
self.current_url = None
self.correct_urls = correct_urls
self.web_proxy = web_proxy
2024-12-13 16:07:46 +00:00
self.event_emitter = event_emitter
async def fetch_event(self, done: bool):
if not self.event_emitter:
return
description = (f"Fetched: {self.original_url}" if done
else f"Fetching: {self.original_url}")
await self.event_emitter({
"type": "status",
"data": {
"status": "complete" if done else "in_progress",
"description": description,
"done": done,
},
})
async def error_event(self, message: str="There was an error"):
if not self.event_emitter:
return
await self.event_emitter({
"type": "status",
"data": {
"status": "error",
"description": message,
"done": True,
},
})
async def create_citation(self, title: str, content: str):
if not self.event_emitter:
return
document = content
link = (self.web_proxy.replace("<URL>", self.original_url)
if self.web_proxy != "" else self.original_url)
2024-12-13 16:07:46 +00:00
await self.event_emitter({
"type": "source",
"data": {
"document": [document],
"metadata": [{"source": title, "html": False }],
"source": {"name": title, "url": link},
2024-12-13 16:07:46 +00:00
}
})
async def complete_event(self, content: str):
title = extract_title(content, self.original_url)
await self.fetch_event(done=True)
await self.create_citation(title, content)
def correct_url(self, url: str) -> str:
if url.startswith("gemini://http://"):
match = re.match(r'gemini://http://(.+)', url)
if match:
return f"gemini://{match.group(1)}"
return url
if url.startswith("gemini://https://"):
match = re.match(r'gemini://https://(.+)', url)
if match:
return f"gemini://{match.group(1)}"
return url
if url.startswith("https://"):
match = re.match(r'https://(.+)', url)
if match:
return f"gemini://{match.group(1)}"
return url
if url.startswith("http://"):
match = re.match(r'http://(.+)', url)
if match:
return f"gemini://{match.group(1)}"
return url
if not url.startswith("gemini://"):
return f"gemini://{url}"
2024-11-30 11:41:13 +00:00
return url
2024-12-13 16:07:46 +00:00
async def fetch(self, prev_url: Optional[str]=None, redirects: int=0) -> dict:
await self.fetch_event(done=False)
gemini_url = (self.current_url
if self.current_url is not None
else self.original_url)
2024-11-30 19:54:08 +00:00
2024-12-13 16:07:46 +00:00
if redirects > 5:
return {
"success": False,
"content": f"Too many redirects (ended at {gemini_url})",
"redirected": prev_url is not None
}
2024-11-30 19:54:08 +00:00
2024-12-13 16:07:46 +00:00
if self.correct_urls and not prev_url:
corrected_url = self.correct_url(gemini_url)
if corrected_url != gemini_url:
print(f"[Gemini] URL '{gemini_url}' corrected to '{corrected_url}'")
gemini_url = corrected_url
2024-11-30 19:54:08 +00:00
2024-12-13 16:07:46 +00:00
if not prev_url:
print(f"[Gemini] Fetching: {gemini_url}")
else:
print(f"[Gemini] Fetching: {gemini_url} (redirected from {prev_url})")
try:
response = ignition.request(gemini_url, raise_errors=True, referer=prev_url)
if isinstance(response, SuccessResponse):
content = self.handle_content(response)
await self.complete_event(content)
return {
"success": True,
"content": content,
"redirected": prev_url is not None
}
elif isinstance(response, RedirectResponse):
prev_url = self.current_url
self.current_url = response.data()
return await self.fetch(prev_url, redirects + 1)
else:
await self.error_event(f"Unsupported status code {response.status}")
print(f"[Gemini] Unsupported {response.status} code for '{gemini_url}'")
message = (f"Tell the user there was a {response.status} status code. "
f"Support for handling {response.status} is not implemented yet.")
return { "success": False, "content": message, "redirected": prev_url is not None }
except Exception as e:
print(f"[Gemini] error: {e}")
message = f"Tell the user there was an error fetching the page: {e}"
await self.error_event(str(e))
return {
2024-12-13 16:07:46 +00:00
"success": False,
"content": message,
"redirected": prev_url is not None
}
2024-12-13 16:07:46 +00:00
def handle_content(self, resp: SuccessResponse) -> str:
try:
mime_type, encoding = resp.meta.split(";")
except ValueError:
mime_type = resp.meta
mime_type = mime_type.strip()
if mime_type == "text/gemini":
return resp.data().strip()
2024-11-30 19:54:08 +00:00
else:
2024-12-13 16:07:46 +00:00
raise ValueError(f"Not yet able to handle MIME type {mime_type}")
2024-11-30 11:41:13 +00:00
class Tools:
class Valves(BaseModel):
attempt_url_correction: bool = Field(
2024-11-30 11:41:13 +00:00
default=True, description="Attempt to correct malformed URLs (default enabled)."
)
gemini_web_proxy: str = Field(
default="", description="Web proxy to use for clickable citation links."
)
2024-11-30 11:41:13 +00:00
pass
def __init__(self):
self.valves = self.Valves()
self.user_valves = None
2024-12-13 16:07:46 +00:00
async def get_gemini_page(self, gemini_url: str, __event_emitter__) -> str:
2024-11-30 11:41:13 +00:00
"""
Fetches Gemini capsules, content, and web pages over Gemini Protocol.
Use this if the user requests a gemini:// URL.
:param gemini_url: The URL to fetch. The URL MUST begin with gemini://.
:return: The fetched data as Markdown.
"""
2024-12-13 16:07:46 +00:00
fetcher = GeminiFetcher(
gemini_url=gemini_url,
correct_urls=self.valves.attempt_url_correction,
web_proxy=self.valves.gemini_web_proxy,
2024-12-13 16:07:46 +00:00
event_emitter=__event_emitter__
)
resp = await fetcher.fetch()
2024-11-30 20:29:27 +00:00
if resp["success"]:
result_instructions = instructions(gemini_url, redirect=resp["redirected"])
2024-11-30 20:38:26 +00:00
return f"{result_instructions}\n\n```\n{resp['content']}\n```"
else:
return resp["content"]