From 28004d3c0bd1a6bcb1ba7239aa22dc6f0f7c7144 Mon Sep 17 00:00:00 2001 From: projectmoon Date: Thu, 30 Nov 2023 12:03:51 +0100 Subject: [PATCH] support misskey blocks --- src/fediblockhole/__init__.py | 277 ++++++++++++++++++++++++++++++---- 1 file changed, 246 insertions(+), 31 deletions(-) diff --git a/src/fediblockhole/__init__.py b/src/fediblockhole/__init__.py index c97816f..4d5f9d4 100755 --- a/src/fediblockhole/__init__.py +++ b/src/fediblockhole/__init__.py @@ -5,6 +5,7 @@ import argparse import toml import csv import requests +import re import json import time import os.path @@ -148,7 +149,7 @@ def fetch_from_urls(url_sources: dict, blocklists.append(bl) if save_intermediate: save_intermediate_blocklist(bl, savedir, export_fields) - + return blocklists def fetch_from_instances(sources: dict, @@ -245,7 +246,7 @@ def merge_blocklists(blocklists: list[Blocklist], mergeplan: str='max', if save_block_audit_file: blockdata:BlockAudit = { 'domain': domain, - 'count': domain_matches_count, + 'count': domain_matches_count, 'percent': domain_matches_percent, } audit.blocks[domain] = blockdata @@ -258,7 +259,7 @@ def merge_blocklists(blocklists: list[Blocklist], mergeplan: str='max', def apply_mergeplan(oldblock: DomainBlock, newblock: DomainBlock, mergeplan: str='max') -> dict: """Use a mergeplan to decide how to merge two overlapping block definitions - + @param oldblock: The existing block definition. @param newblock: The new block definition we want to merge in. @param mergeplan: How to merge. Choices are 'max', the default, and 'min'. @@ -276,7 +277,7 @@ def apply_mergeplan(oldblock: DomainBlock, newblock: DomainBlock, mergeplan: str except KeyError: log.debug(f"Key '{key}' missing from block definition so cannot compare. Continuing...") continue - + # How do we override an earlier block definition? if mergeplan in ['max', None]: # Use the highest block level found (the default) @@ -285,7 +286,7 @@ def apply_mergeplan(oldblock: DomainBlock, newblock: DomainBlock, mergeplan: str if newblock.severity > oldblock.severity: # log.debug(f"New block severity is higher. Using that.") blockdata['severity'] = newblock.severity - + # For 'reject_media', 'reject_reports', and 'obfuscate' if # the value is set and is True for the domain in # any blocklist then the value is set to True. @@ -345,7 +346,7 @@ def merge_comments(oldcomment:str, newcomment:str) -> str: # "boring, lack of moderation, nazis, scrapers" old_tokens = oldcomment.split(', ') new_tokens = newcomment.split(', ') - + # Remove any empty string tokens that we get while '' in old_tokens: old_tokens.remove('') @@ -374,7 +375,141 @@ def requests_headers(token: str=None): return headers +def get_nodeinfo(server, token: str=None, host_meta_fallback = False): + headers = requests_headers(token) + url = f'https://{server}/.well-known/nodeinfo' + try: + resp = requests.get(url, headers=headers, timeout=REQUEST_TIMEOUT) + except Exception as ex: + log.error(f"Error getting host node info for {server}. Exception: {ex}") + return None + + # if well-known nodeinfo isn't found, try to check host-meta for a webfinger URL + # needed on servers where the display domain is different than the web domain + if resp.status_code != 200 and not host_meta_fallback: + # not found, try to check host-meta as a fallback + log.warning(f'nodeinfo for {server} not found, checking host-meta') + new_server = get_server_from_host_meta(server) + if new_server is not None: + if new_server == server: + log.debug(f'host-meta for {server} did not get a new server.') + return None + else: + return get_nodeinfo(new_server, True) + else: + return None + + if resp.status_code == 200: + nodeLoc = None + try: + nodeInfo = resp.json() + for link in nodeInfo['links']: + if link['rel'] in [ + 'http://nodeinfo.diaspora.software/ns/schema/2.0', + 'http://nodeinfo.diaspora.software/ns/schema/2.1', + ]: + nodeLoc = link['href'] + break + except Exception as ex: + log.error(f'error getting server {server} info from well-known node info. Exception: {ex}') + return None + else: + log.error(f'Error getting well-known host node info for {server}. Status Code: {resp.status_code}') + return None + + if nodeLoc is None: + log.warning(f'could not find link to node info in well-known nodeinfo of {server}') + return None + + # regrab server from nodeLoc, again in the case of different display and web domains + match = re.match( + r"https://(?P[^/]+)/", nodeLoc + ) + if match is None: + log.error(f"Error getting web server name from {server}.") + return None + + server = match.group('server') + + try: + resp = requests.get(nodeLoc, headers=headers, timeout=REQUEST_TIMEOUT) + except Exception as ex: + log.error(f"Error getting host node info for {server}. Exception: {ex}") + return None + + if resp.status_code == 200: + try: + nodeInfo = resp.json() + if 'activitypub' not in nodeInfo['protocols']: + log.warning(f'server {server} does not support activitypub, skipping') + return None + return { + 'webserver': server, + 'software': nodeInfo['software']['name'], + 'version': nodeInfo['software']['version'], + 'rawnodeinfo': nodeInfo, + } + except Exception as ex: + log.error(f'error getting server {server} info from nodeinfo. Exception: {ex}') + return None + else: + log.error(f'Error getting host node info for {server}. Status Code: {resp.status_code}') + return None + +def set_server_apis(server): + # support for new server software should be added here + software_apis = { + 'mastodonApiSupport': ['mastodon', 'pleroma', 'akkoma', 'pixelfed', 'hometown', 'iceshrimp'], + 'misskeyApiSupport': ['misskey', 'calckey', 'firefish', 'foundkey', 'sharkey'], + 'lemmyApiSupport': ['lemmy'] + } + + for api, softwareList in software_apis.items(): + server[api] = server['software'] in softwareList + + # search `features` list in metadata if available + if 'metadata' in server['rawnodeinfo'] and 'features' in server['rawnodeinfo']['metadata'] and type(server['rawnodeinfo']['metadata']['features']) is list: + features = server['rawnodeinfo']['metadata']['features'] + if 'mastodon_api' in features: + server['mastodonApiSupport'] = True + +def fetch_server_info(server, token: str=None): + nodeinfo = get_nodeinfo(server, token=token) + if nodeinfo is None: + return None + else: + set_server_apis(nodeinfo) + return nodeinfo + +def fetch_api_type(host: str, token: str=None) -> str: + server_info = fetch_server_info(host, token) + # Many Misskey supports also support Mastodon API to varying + # extents, so we should check Misskey API support before checking + # Mastodon API support. + if server_info['misskeyApiSupport']: + return "misskey" + elif server_info['mastodonApiSupport']: + return "mastodon" + else: + sys.exit(f"Unknown API type for {host}") + + def fetch_instance_blocklist(host: str, token: str=None, admin: bool=False, + import_fields: list=['domain', 'severity'], + scheme: str='https') -> list[DomainBlock]: + """ + Fetch existing block from server (Misskey or Mastodon) + """ + api_type = fetch_api_type(host) + + if api_type == 'mastodon': + return fetch_instance_blocklist_mastodon(host, token, admin, import_fields, scheme) + elif api_type == 'misskey': + return fetch_instance_blocklist_misskey(host, token, admin, import_fields, scheme) + else: + sys.exit(f"Unknown API type for server {host}") + +def fetch_instance_blocklist_mastodon(host: str, token: str=None, admin: bool=False, import_fields: list=['domain', 'severity'], scheme: str='https') -> list[DomainBlock]: """Fetch existing block list from server @@ -385,7 +520,7 @@ def fetch_instance_blocklist(host: str, token: str=None, admin: bool=False, @param import_fields: A list of fields to import from the remote instance. @returns: A list of the domain blocks from the instance. """ - log.info(f"Fetching instance blocklist from {host} ...") + log.info(f"Fetching instance blocklist (Mastodon) from {host} ...") if admin: api_path = "/api/v1/admin/domain_blocks" @@ -424,7 +559,7 @@ def fetch_instance_blocklist(host: str, token: str=None, admin: bool=False, else: next = pagination[0] # prev = pagination[1] - + urlstring, rel = next.split('; ') url = urlstring.strip('<').rstrip('>') @@ -432,6 +567,54 @@ def fetch_instance_blocklist(host: str, token: str=None, admin: bool=False, return blocklist +def fetch_instance_blocklist_misskey(host: str, token: str=None, admin: bool=False, + import_fields: list=['domain', 'severity'], + scheme: str='https') -> list[DomainBlock]: + """Fetch existing block list from server + + @param host: The remote host to connect to. + @param token: The (required) OAuth Bearer token to authenticate with. + @param admin: Boolean flag to use the admin API if True. + @param import_fields: A list of fields to import from the remote instance. + @returns: A list of the domain blocks from the instance. + """ + log.info(f"Fetching instance blocklist (Misskey) from {host} ...") + + if admin: + api_path = "/api/admin/meta" + parse_format = 'json' + else: + sys.exit("Must be an admin to use the meta API") + + headers = requests_headers(token) + + url = f"{scheme}://{host}{api_path}" + + blockdata = [] + + response = requests.post(url, headers=headers, timeout=REQUEST_TIMEOUT, json={}) + if response.status_code != 200: + log.error(f"{response.status_code} Cannot fetch remote blocklist: {response.content}") + raise ValueError("Unable to fetch domain block list: %s", response) + + # Convert the Misskey block (which are just domains) into + # something similar to Mastodon response that Fediblockhole + # can understand. + def map_block(domain, severity): + return { "domain": domain, "digest": "", "severity": severity, "comment": "" } + + meta = json.loads(response.content.decode('utf-8')) + + blocked_hosts = [ map_block(domain, "suspend") for domain in meta['blockedHosts'] ] + silenced_hosts = [ map_block(domain, "silence") for domain in meta['silencedHosts'] ] + + blockdata.extend(blocked_hosts) + blockdata.extend(silenced_hosts) + + blocklist = parse_blocklist(blockdata, url, parse_format, import_fields) + + return blocklist + def delete_block(token: str, host: str, id: int, scheme: str='https'): """Remove a domain block""" log.debug(f"Removing domain block {id} at {host}...") @@ -552,9 +735,9 @@ def add_block(token: str, host: str, blockdata: DomainBlock, scheme: str='https' log.warning(err['error']) elif response.status_code != 200: - + raise ValueError(f"Something went wrong: {response.status_code}: {response.content}") - + def push_blocklist(token: str, host: str, blocklist: list[DomainBlock], dryrun: bool=False, import_fields: list=['domain', 'severity'], @@ -563,7 +746,7 @@ def push_blocklist(token: str, host: str, blocklist: list[DomainBlock], override_private_comment: str=None ): """Push a blocklist to a remote instance. - + Updates existing entries if they exist, creates new blocks if they don't. @param token: The Bearer token for OAUTH API authentication @@ -579,11 +762,10 @@ def push_blocklist(token: str, host: str, blocklist: list[DomainBlock], serverblocks = fetch_instance_blocklist(host, token, True, import_fields, scheme) # # Convert serverblocks to a dictionary keyed by domain name - # knownblocks = {row.domain: row for row in serverblocks} - for newblock in blocklist.values(): - log.debug(f"Processing block: {newblock}") + + # Already existing block if newblock.domain in serverblocks: log.debug(f"Block already exists for {newblock.domain}, checking for differences...") @@ -612,19 +794,12 @@ def push_blocklist(token: str, host: str, blocklist: list[DomainBlock], blockdata = oldblock.copy() blockdata.update(newblock) log.debug(f"Block as dict: {blockdata._asdict()}") - - if not dryrun: - update_known_block(token, host, blockdata, scheme) - # add a pause here so we don't melt the instance - time.sleep(API_CALL_DELAY) - else: - log.info("Dry run selected. Not applying changes.") - else: log.debug("No differences detected. Not updating.") pass else: + # New block # stamp this record with a private comment, since we're the ones adding it if override_private_comment: newblock.private_comment = override_private_comment @@ -636,16 +811,56 @@ def push_blocklist(token: str, host: str, blocklist: list[DomainBlock], # Make sure the new block doesn't clobber a domain with followers newblock.severity = check_followed_severity(host, token, newblock.domain, newblock.severity, max_followed_severity, scheme) - if not dryrun: - add_block(token, host, newblock, scheme) - # add a pause here so we don't melt the instance - time.sleep(API_CALL_DELAY) - else: - log.info("Dry run selected. Not adding block.") + + # Append existing blocks to the new blocks (needed because Misskey + # is all at once update operation) + for existing_block in serverblocks.values(): + if existing_block.domain not in blocklist: + blocklist.blocks[existing_block.domain] = existing_block + + apply_blocks_misskey(blocklist, host, scheme, token) + +def apply_blocks_misskey(blocklist: Blocklist, host:str, scheme: str="https", token: str=None): + # instead of being cool and trying to update everything, split + # into lists of suspends and silences, then call update meta with + # these new lists. + suspended_hosts = [] + silenced_hosts = [] + + suspend_level = BlockSeverity("suspend") + silence_level = BlockSeverity("silence") + + for block in blocklist.values(): + if block.severity == suspend_level: + suspended_hosts.append(block.domain) + elif block.severity == silence_level: + silenced_hosts.append(block.domain) + + # Make one big call to update-meta with new blocks and suspends + api_path = "/api/admin/update-meta" + parse_format = 'json' + + headers = requests_headers(token) + + url = f"{scheme}://{host}{api_path}" + + update_data = { + "blockedHosts": suspended_hosts, + "silencedHosts": silenced_hosts + } + + response = requests.post(url, headers=headers, timeout=REQUEST_TIMEOUT, json=update_data) + if response.status_code != 200 and response.status_code != 204: + log.error(f"{response.status_code} Unable to apply blocklist: {response.content}") + raise ValueError("Unable to apply block list: %s", response) + + log.info(f"Updated meta with {len(suspended_hosts)} blocks and {len(silenced_hosts)} silences") + return True + def load_config(configfile: str): """Augment commandline arguments with config file parameters - + Config file is expected to be in TOML format """ conf = toml.load(configfile) @@ -723,7 +938,7 @@ def save_domain_block_audit_to_file( def augment_args(args, tomldata: str=None): """Augment commandline arguments with config file parameters - + If tomldata is provided, uses that data instead of loading from a config file. """ @@ -749,7 +964,7 @@ def augment_args(args, tomldata: str=None): if not args.override_private_comment: args.override_private_comment = conf.get('override_private_comment', None) - + if not args.savedir: args.savedir = conf.get('savedir', '/tmp')