diff --git a/README.md b/README.md new file mode 100644 index 0000000..9f194d8 --- /dev/null +++ b/README.md @@ -0,0 +1,91 @@ +# FediBlockHole + +A tool for keeping a Mastodon instance blocklist synchronised with remote lists. + +## Features + + - Import and export block lists from CSV files. + - Read a block list from a remote instance (if a token is configured) + +## Installing + +Instance admins who want to use this tool will need to add an Application at +`https:///settings/applications/` they can authorise with an +OAuth token. For each instance you connect to, add this token to the config file. + +### Reading remote instance blocklists + +To read admin blocks from a remote instance, you'll need to ask the instance admin to add a new Application at `https:///settings/applications/` and then tell you the access token. + +The application needs the `admin:read:domain_blocks` OAuth scope, but unfortunately this +scope isn't available in the current application screen (v4.0.2 of Mastodon at +time of writing). There is a way to do it with scopes, but it's really +dangerous, so I'm not going to tell you what it is here. + +A better way is to ask the instance admin to connect to the PostgreSQL database +and add the scope there, like this: + +``` +UPDATE oauth_access_tokens + SET scopes='admin:read:domain_blocks' + WHERE token=''; +``` + +When that's done, FediBlockHole should be able to use its token to authorise +adding or updating domain blocks via the API. + +### Writing instance blocklists + +To write domain blocks into an instance requires both the +`admin:read:domain_blocks` and `admin:write:domain_blocks` OAuth scopes. The +`read` scope is used to read the current list of domain blocks so we update ones +that already exist, rather than trying to add all new ones and clutter up the +instance. + +Again, there's no way to do this (yet) on the application admin +screen so we need to ask our destination admins to update the application +permissions similar to reading domain blocks: + +``` +UPDATE oauth_access_tokens + SET scopes='admin:read:domain_blocks admin:write:domain_blocks' + WHERE token=''; +``` + +When that's done, FediBlockHole should be able to use its token to authorise +adding or updating domain blocks via the API. + +## Configuring + +Once you have your applications and tokens and scopes set up, create a +configuration file for FediBlockHole to use. You can put it anywhere and use the +`-c ` commandline parameter to tell FediBlockHole where it is. + +Or you can use the default location of `/etc/default/fediblockhole.conf.toml`. + +As the filename suggests, FediBlockHole uses TOML syntax. + +There are 2 key sections: + + 1. `blocklist_instance_sources`: A list of instances to read blocklists from + 1. `blocklist_instance_destinations`: A list of instances to write blocklists to + +Each is a list of dictionaries of the form: +``` +{ domain = '', token = '' } +``` + +The `domain` is the fully-qualified domain name of the API host for an instance +you want to read or write domain blocks to/from. The `BearerToken` is the OAuth +token for the application that's configured in the instance to allow you to +read/write domain blocks, as discussed above. + +## Using the tool + +Once you've configured the tool, run it like this: + +``` +fediblock_sync.py -c +``` + +If you put the config file in `/etc/default/fediblockhole.conf.toml` you don't need to pass the config file path. \ No newline at end of file diff --git a/bin/fediblock_sync.py b/bin/fediblock_sync.py new file mode 100755 index 0000000..b8f565a --- /dev/null +++ b/bin/fediblock_sync.py @@ -0,0 +1,330 @@ +#!/usr/bin/python3 +# Export and import blocklists via API + +import argparse +import toml +import csv +import requests +import json +import csv +import time + +import logging +logging.basicConfig(level=logging.DEBUG, + format='%(asctime)s %(levelname)s %(message)s') +import pprint + +log = logging.getLogger('fediblock_sync') + +CONFIGFILE = "/home/mastodon/etc/admin.conf" + +def sync_blocklists(conf: dict): + """Sync instance blocklists from remote sources. + """ + # Build a dict of blocklists we retrieve from remote sources. + # We will merge these later using a merge algorithm we choose. + + blocklists = {} + # Fetch blocklists from URLs + # for listurl in conf.blocklist_csv_sources: + # blocklists[listurl] = {} + # response = requests.get(url) + # log.debug(f"Fetched blocklist CSV file: {response.content}") + + # Fetch blocklists from remote instances + for blocklist_src in conf['blocklist_instance_sources']: + domain = blocklist_src['domain'] + token = blocklist_src['token'] + blocklists[domain] = fetch_instance_blocklist(token, domain) + + # Merge blocklists into an update dict + merged = merge_blocklists(blocklists) + + # log.debug(f"Merged blocklist ready:\n") + # pprint.pp(merged) + + # Push the blocklist to destination instances + for dest in conf['blocklist_instance_destinations']: + domain = dest['domain'] + token = dest['token'] + push_blocklist(token, domain, merged.values()) + +def merge_blocklists(blocklists: dict, mergeplan: str='max') -> dict: + """Merge fetched remote blocklists into a bulk update + + @param mergeplan: An optional method of merging overlapping block definitions + 'max' (the default) uses the highest severity block found + 'min' uses the lowest severity block found + """ + # log.debug(f"Merging blocklists {blocklists} ...") + # Remote blocklists may have conflicting overlaps. We need to + # decide whether or not to override earlier entries with later + # ones or not. How to choose which entry is 'correct'? + merged = {} + + for key, blist in blocklists.items(): + log.debug(f"Adding blocks from {key} ...") + for blockdef in blist: + # log.debug(f"Checking blockdef {blockdef} ...") + domain = blockdef['domain'] + if domain in merged: + blockdata = merged[domain] + + # If the public or private comment is different, + # append it to the existing comment, joined with a newline + if blockdef['public_comment'] != blockdata['public_comment'] and blockdata['public_comment'] != '': + blockdata['public_comment'] = '\n'.join([blockdef['public_comment'], blockdata['public_comment']]) + + if blockdef['private_comment'] != blockdata['private_comment'] and blockdata['private_comment'] != '': + blockdata['private_comment'] = '\n'.join([blockdef['private_comment'], blockdata['private_comment']]) + + # How do we override an earlier block definition? + if mergeplan in ['max', None]: + # Use the highest block level found (the default) + if blockdef['severity'] == 'suspend': + blockdata['severity'] = 'suspend' + + if blockdef['reject_media'] == True: + blockdata['reject_media'] = True + + if blockdef['reject_reports'] == True: + blockdata['reject_reports'] = True + + elif mergeplan in ['min']: + # Use the lowest block level found + if blockdef['severity'] == 'silence': + blockdata['severity'] = 'silence' + + if blockdef['reject_media'] == False: + blockdata['reject_media'] = False + + if blockdef['reject_reports'] == False: + blockdata['reject_reports'] = False + + else: + raise NotImplementedError(f"Mergeplan '{mergeplan}' not implemented.") + + else: + # New block + blockdata = { + 'domain': blockdef['domain'], + # Default to Silence if nothing is specified + 'severity': blockdef.get('severity', 'silence'), + 'public_comment': blockdef['public_comment'], + 'private_comment': blockdef['private_comment'], + 'reject_media': blockdef.get('reject_media', False), + 'reject_reports': blockdef.get('reject_reports', False), + 'obfuscate': blockdef.get('obfuscate', False), + } + merged[domain] = blockdata + + return merged + +def fetch_instance_blocklist(token: str, host: str) -> list: + """Fetch existing block list from server + """ + api_path = "/api/v1/admin/domain_blocks" + + url = f"https://{host}{api_path}" + + domain_blocks = [] + link = True + + while link: + response = requests.get(url, headers={'Authorization': f"Bearer {token}"}) + if response.status_code != 200: + log.error(f"Cannot fetch remote blocklist: {response.content}") + raise ValueError("Unable to fetch domain block list: %s", response) + domain_blocks.extend(json.loads(response.content)) + + # Parse the link header to find the next url to fetch + # This is a weird and janky way of doing pagination but + # hey nothing we can do about it we just have to deal + link = response.headers['Link'] + pagination = link.split(', ') + if len(pagination) != 2: + link = None + break + else: + next = pagination[0] + prev = pagination[1] + + urlstring, rel = next.split('; ') + url = urlstring.strip('<').rstrip('>') + + log.debug(f"Found {len(domain_blocks)} existing domain blocks.") + return domain_blocks + +def export_blocklist(token: str, host: str, outfile: str): + """Export current server blocklist to a csv file""" + blocklist = fetch_instance_blocklist(token, host) + fieldnames = ['id', 'domain', 'severity', 'reject_media', 'reject_reports', 'private_comment', 'public_comment', 'obfuscate'] + + blocklist = sorted(blocklist, key=lambda x: int(x['id'])) + + with open(outfile, "w") as fp: + writer = csv.DictWriter(fp, fieldnames, extrasaction='ignore') + writer.writeheader() + writer.writerows(blocklist) + +def delete_blocklist(token: str, host: str, blockfile: str): + """Delete domain blocks listed in blockfile""" + with open(blockfile) as fp: + reader = csv.DictReader(fp) + for row in reader: + domain = row['domain'] + id = row['id'] + log.debug(f"Deleting {domain} (id: {id}) from blocklist...") + delete_block(token, host, id) + +def delete_block(token: str, host: str, id: int): + """Remove a domain block""" + log.debug(f"Removing domain block {id} at {host}...") + api_path = "/api/v1/admin/domain_blocks/" + + url = f"https://{host}{api_path}{id}" + + response = requests.delete(url, + headers={'Authorization': f"Bearer {token}"} + ) + if response.status_code != 200: + if response.status_code == 404: + log.warn(f"No such domain block: {id}") + return + + raise ValueError(f"Something went wrong: {response.status_code}: {response.content}") + +def update_known_block(token: str, host: str, blockdict: dict): + """Update an existing domain block with information in blockdict""" + api_path = "/api/v1/admin/domain_blocks/" + + id = blockdict['id'] + blockdata = blockdict.copy() + del blockdata['id'] + + url = f"https://{host}{api_path}{id}" + + response = requests.put(url, + headers={'Authorization': f"Bearer {token}"}, + data=blockdata + ) + if response.status_code != 200: + raise ValueError(f"Something went wrong: {response.status_code}: {response.content}") + +def add_block(token: str, host: str, blockdata: dict): + """Block a domain on Mastodon host + """ + log.debug(f"Blocking domain {blockdata['domain']} at {host}...") + api_path = "/api/v1/admin/domain_blocks" + + url = f"https://{host}{api_path}" + + response = requests.post(url, + headers={'Authorization': f"Bearer {token}"}, + data=blockdata + ) + if response.status_code != 200: + raise ValueError(f"Something went wrong: {response.status_code}: {response.content}") + +def push_blocklist(token: str, host: str, blocklist: list[dict]): + """Push a blocklist to a remote instance. + + Merging the blocklist with the existing list the instance has, + updating existing entries if they exist. + + @param token: The Bearer token for OAUTH API authentication + @param host: The instance host, FQDN or IP + @param blocklist: A list of block definitions. They must include the domain. + """ + # Fetch the existing blocklist from the instance + serverblocks = fetch_instance_blocklist(token, host) + + # Convert serverblocks to a dictionary keyed by domain name + knownblocks = {row['domain']: row for row in serverblocks} + + for row in blocklist: + # log.debug(f"Importing definition: {row}") + + if 'id' in row: del row['id'] + + try: + blockdict = knownblocks[row['domain']] + log.info(f"Block already exists for {row['domain']}, merging data...") + + # Check if anything is actually different and needs updating + change_needed = False + for key in [ + 'severity', + 'public_comment', + 'private_comment', + 'reject_media', + 'reject_reports', + 'obfuscate', + ]: + try: + if blockdict[key] != knownblocks[key]: + change_needed = True + break + except KeyError: + break + + if change_needed: + log.debug(f"Change detected. Updating domain block for {row['domain']}") + blockdict.update(row) + update_known_block(token, host, blockdict) + # add a pause here so we don't melt the instance + time.sleep(1) + + else: + log.info("No differences detected. Not updating.") + + except KeyError: + # domain doesn't have an entry, so we need to create one + blockdata = { + 'domain': row['domain'], + # Default to Silence if nothing is specified + 'severity': row.get('severity', 'silence'), + 'public_comment': row['public_comment'], + 'private_comment': row['private_comment'], + 'reject_media': row.get('reject_media', False), + 'reject_reports': row.get('reject_reports', False), + 'obfuscate': row.get('obfuscate', False), + } + log.info(f"Adding new block for {blockdata['domain']}...") + add_block(token, host, blockdata) + # add a pause here so we don't melt the instance + time.sleep(1) + +def load_config(configfile: str): + """Augment commandline arguments with config file parameters + + Config file is expected to be in TOML format + """ + conf = toml.load(configfile) + return conf + +def save_intermediate(blocklist: list, source: str, filedir: str): + """Save a blocklist we've downloaded from a remote source + + Save a local copy of the remote blocklist after downloading it. + """ + + +if __name__ == '__main__': + + ap = argparse.ArgumentParser(description="Bulk blocklist tool", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + ap.add_argument('-c', '--config', default='/etc/default/fediblockhole.conf.toml', help="Config file") + + ap.add_argument('--loglevel', choices=['debug', 'info', 'warning', 'error', 'critical'], help="Set log output level.") + + args = ap.parse_args() + if args.loglevel is not None: + levelname = args.loglevel.upper() + log.setLevel(getattr(logging, levelname)) + + # Load the configuration file + conf = load_config(args.config) + + # Do the work of syncing + sync_blocklists(conf) \ No newline at end of file diff --git a/etc/sample.fediblockhole.conf.toml b/etc/sample.fediblockhole.conf.toml new file mode 100644 index 0000000..fff89e0 --- /dev/null +++ b/etc/sample.fediblockhole.conf.toml @@ -0,0 +1,20 @@ +# List of instances to read blocklists from, +# with the Bearer token authorised by the instance +blocklist_instance_sources = [ + { + domain = 'eigenmagic.net', + token = '' + }, + { + domain = 'jorts.horse', + token = '' + }, +] + +# List of instances to write blocklist to +blocklist_instance_destinations = [ + { + domain = 'eigenmagic.net', + token = '' + }, +] \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..0dce617 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +requests +toml \ No newline at end of file