support misskey blocks

This commit is contained in:
projectmoon 2023-11-30 12:03:51 +01:00
parent 4d12bac5a4
commit 28004d3c0b
1 changed files with 246 additions and 31 deletions

View File

@ -5,6 +5,7 @@ import argparse
import toml import toml
import csv import csv
import requests import requests
import re
import json import json
import time import time
import os.path import os.path
@ -148,7 +149,7 @@ def fetch_from_urls(url_sources: dict,
blocklists.append(bl) blocklists.append(bl)
if save_intermediate: if save_intermediate:
save_intermediate_blocklist(bl, savedir, export_fields) save_intermediate_blocklist(bl, savedir, export_fields)
return blocklists return blocklists
def fetch_from_instances(sources: dict, def fetch_from_instances(sources: dict,
@ -245,7 +246,7 @@ def merge_blocklists(blocklists: list[Blocklist], mergeplan: str='max',
if save_block_audit_file: if save_block_audit_file:
blockdata:BlockAudit = { blockdata:BlockAudit = {
'domain': domain, 'domain': domain,
'count': domain_matches_count, 'count': domain_matches_count,
'percent': domain_matches_percent, 'percent': domain_matches_percent,
} }
audit.blocks[domain] = blockdata audit.blocks[domain] = blockdata
@ -258,7 +259,7 @@ def merge_blocklists(blocklists: list[Blocklist], mergeplan: str='max',
def apply_mergeplan(oldblock: DomainBlock, newblock: DomainBlock, mergeplan: str='max') -> dict: def apply_mergeplan(oldblock: DomainBlock, newblock: DomainBlock, mergeplan: str='max') -> dict:
"""Use a mergeplan to decide how to merge two overlapping block definitions """Use a mergeplan to decide how to merge two overlapping block definitions
@param oldblock: The existing block definition. @param oldblock: The existing block definition.
@param newblock: The new block definition we want to merge in. @param newblock: The new block definition we want to merge in.
@param mergeplan: How to merge. Choices are 'max', the default, and 'min'. @param mergeplan: How to merge. Choices are 'max', the default, and 'min'.
@ -276,7 +277,7 @@ def apply_mergeplan(oldblock: DomainBlock, newblock: DomainBlock, mergeplan: str
except KeyError: except KeyError:
log.debug(f"Key '{key}' missing from block definition so cannot compare. Continuing...") log.debug(f"Key '{key}' missing from block definition so cannot compare. Continuing...")
continue continue
# How do we override an earlier block definition? # How do we override an earlier block definition?
if mergeplan in ['max', None]: if mergeplan in ['max', None]:
# Use the highest block level found (the default) # Use the highest block level found (the default)
@ -285,7 +286,7 @@ def apply_mergeplan(oldblock: DomainBlock, newblock: DomainBlock, mergeplan: str
if newblock.severity > oldblock.severity: if newblock.severity > oldblock.severity:
# log.debug(f"New block severity is higher. Using that.") # log.debug(f"New block severity is higher. Using that.")
blockdata['severity'] = newblock.severity blockdata['severity'] = newblock.severity
# For 'reject_media', 'reject_reports', and 'obfuscate' if # For 'reject_media', 'reject_reports', and 'obfuscate' if
# the value is set and is True for the domain in # the value is set and is True for the domain in
# any blocklist then the value is set to True. # any blocklist then the value is set to True.
@ -345,7 +346,7 @@ def merge_comments(oldcomment:str, newcomment:str) -> str:
# "boring, lack of moderation, nazis, scrapers" # "boring, lack of moderation, nazis, scrapers"
old_tokens = oldcomment.split(', ') old_tokens = oldcomment.split(', ')
new_tokens = newcomment.split(', ') new_tokens = newcomment.split(', ')
# Remove any empty string tokens that we get # Remove any empty string tokens that we get
while '' in old_tokens: while '' in old_tokens:
old_tokens.remove('') old_tokens.remove('')
@ -374,7 +375,141 @@ def requests_headers(token: str=None):
return headers return headers
def get_nodeinfo(server, token: str=None, host_meta_fallback = False):
headers = requests_headers(token)
url = f'https://{server}/.well-known/nodeinfo'
try:
resp = requests.get(url, headers=headers, timeout=REQUEST_TIMEOUT)
except Exception as ex:
log.error(f"Error getting host node info for {server}. Exception: {ex}")
return None
# if well-known nodeinfo isn't found, try to check host-meta for a webfinger URL
# needed on servers where the display domain is different than the web domain
if resp.status_code != 200 and not host_meta_fallback:
# not found, try to check host-meta as a fallback
log.warning(f'nodeinfo for {server} not found, checking host-meta')
new_server = get_server_from_host_meta(server)
if new_server is not None:
if new_server == server:
log.debug(f'host-meta for {server} did not get a new server.')
return None
else:
return get_nodeinfo(new_server, True)
else:
return None
if resp.status_code == 200:
nodeLoc = None
try:
nodeInfo = resp.json()
for link in nodeInfo['links']:
if link['rel'] in [
'http://nodeinfo.diaspora.software/ns/schema/2.0',
'http://nodeinfo.diaspora.software/ns/schema/2.1',
]:
nodeLoc = link['href']
break
except Exception as ex:
log.error(f'error getting server {server} info from well-known node info. Exception: {ex}')
return None
else:
log.error(f'Error getting well-known host node info for {server}. Status Code: {resp.status_code}')
return None
if nodeLoc is None:
log.warning(f'could not find link to node info in well-known nodeinfo of {server}')
return None
# regrab server from nodeLoc, again in the case of different display and web domains
match = re.match(
r"https://(?P<server>[^/]+)/", nodeLoc
)
if match is None:
log.error(f"Error getting web server name from {server}.")
return None
server = match.group('server')
try:
resp = requests.get(nodeLoc, headers=headers, timeout=REQUEST_TIMEOUT)
except Exception as ex:
log.error(f"Error getting host node info for {server}. Exception: {ex}")
return None
if resp.status_code == 200:
try:
nodeInfo = resp.json()
if 'activitypub' not in nodeInfo['protocols']:
log.warning(f'server {server} does not support activitypub, skipping')
return None
return {
'webserver': server,
'software': nodeInfo['software']['name'],
'version': nodeInfo['software']['version'],
'rawnodeinfo': nodeInfo,
}
except Exception as ex:
log.error(f'error getting server {server} info from nodeinfo. Exception: {ex}')
return None
else:
log.error(f'Error getting host node info for {server}. Status Code: {resp.status_code}')
return None
def set_server_apis(server):
# support for new server software should be added here
software_apis = {
'mastodonApiSupport': ['mastodon', 'pleroma', 'akkoma', 'pixelfed', 'hometown', 'iceshrimp'],
'misskeyApiSupport': ['misskey', 'calckey', 'firefish', 'foundkey', 'sharkey'],
'lemmyApiSupport': ['lemmy']
}
for api, softwareList in software_apis.items():
server[api] = server['software'] in softwareList
# search `features` list in metadata if available
if 'metadata' in server['rawnodeinfo'] and 'features' in server['rawnodeinfo']['metadata'] and type(server['rawnodeinfo']['metadata']['features']) is list:
features = server['rawnodeinfo']['metadata']['features']
if 'mastodon_api' in features:
server['mastodonApiSupport'] = True
def fetch_server_info(server, token: str=None):
nodeinfo = get_nodeinfo(server, token=token)
if nodeinfo is None:
return None
else:
set_server_apis(nodeinfo)
return nodeinfo
def fetch_api_type(host: str, token: str=None) -> str:
server_info = fetch_server_info(host, token)
# Many Misskey supports also support Mastodon API to varying
# extents, so we should check Misskey API support before checking
# Mastodon API support.
if server_info['misskeyApiSupport']:
return "misskey"
elif server_info['mastodonApiSupport']:
return "mastodon"
else:
sys.exit(f"Unknown API type for {host}")
def fetch_instance_blocklist(host: str, token: str=None, admin: bool=False, def fetch_instance_blocklist(host: str, token: str=None, admin: bool=False,
import_fields: list=['domain', 'severity'],
scheme: str='https') -> list[DomainBlock]:
"""
Fetch existing block from server (Misskey or Mastodon)
"""
api_type = fetch_api_type(host)
if api_type == 'mastodon':
return fetch_instance_blocklist_mastodon(host, token, admin, import_fields, scheme)
elif api_type == 'misskey':
return fetch_instance_blocklist_misskey(host, token, admin, import_fields, scheme)
else:
sys.exit(f"Unknown API type for server {host}")
def fetch_instance_blocklist_mastodon(host: str, token: str=None, admin: bool=False,
import_fields: list=['domain', 'severity'], import_fields: list=['domain', 'severity'],
scheme: str='https') -> list[DomainBlock]: scheme: str='https') -> list[DomainBlock]:
"""Fetch existing block list from server """Fetch existing block list from server
@ -385,7 +520,7 @@ def fetch_instance_blocklist(host: str, token: str=None, admin: bool=False,
@param import_fields: A list of fields to import from the remote instance. @param import_fields: A list of fields to import from the remote instance.
@returns: A list of the domain blocks from the instance. @returns: A list of the domain blocks from the instance.
""" """
log.info(f"Fetching instance blocklist from {host} ...") log.info(f"Fetching instance blocklist (Mastodon) from {host} ...")
if admin: if admin:
api_path = "/api/v1/admin/domain_blocks" api_path = "/api/v1/admin/domain_blocks"
@ -424,7 +559,7 @@ def fetch_instance_blocklist(host: str, token: str=None, admin: bool=False,
else: else:
next = pagination[0] next = pagination[0]
# prev = pagination[1] # prev = pagination[1]
urlstring, rel = next.split('; ') urlstring, rel = next.split('; ')
url = urlstring.strip('<').rstrip('>') url = urlstring.strip('<').rstrip('>')
@ -432,6 +567,54 @@ def fetch_instance_blocklist(host: str, token: str=None, admin: bool=False,
return blocklist return blocklist
def fetch_instance_blocklist_misskey(host: str, token: str=None, admin: bool=False,
import_fields: list=['domain', 'severity'],
scheme: str='https') -> list[DomainBlock]:
"""Fetch existing block list from server
@param host: The remote host to connect to.
@param token: The (required) OAuth Bearer token to authenticate with.
@param admin: Boolean flag to use the admin API if True.
@param import_fields: A list of fields to import from the remote instance.
@returns: A list of the domain blocks from the instance.
"""
log.info(f"Fetching instance blocklist (Misskey) from {host} ...")
if admin:
api_path = "/api/admin/meta"
parse_format = 'json'
else:
sys.exit("Must be an admin to use the meta API")
headers = requests_headers(token)
url = f"{scheme}://{host}{api_path}"
blockdata = []
response = requests.post(url, headers=headers, timeout=REQUEST_TIMEOUT, json={})
if response.status_code != 200:
log.error(f"{response.status_code} Cannot fetch remote blocklist: {response.content}")
raise ValueError("Unable to fetch domain block list: %s", response)
# Convert the Misskey block (which are just domains) into
# something similar to Mastodon response that Fediblockhole
# can understand.
def map_block(domain, severity):
return { "domain": domain, "digest": "", "severity": severity, "comment": "" }
meta = json.loads(response.content.decode('utf-8'))
blocked_hosts = [ map_block(domain, "suspend") for domain in meta['blockedHosts'] ]
silenced_hosts = [ map_block(domain, "silence") for domain in meta['silencedHosts'] ]
blockdata.extend(blocked_hosts)
blockdata.extend(silenced_hosts)
blocklist = parse_blocklist(blockdata, url, parse_format, import_fields)
return blocklist
def delete_block(token: str, host: str, id: int, scheme: str='https'): def delete_block(token: str, host: str, id: int, scheme: str='https'):
"""Remove a domain block""" """Remove a domain block"""
log.debug(f"Removing domain block {id} at {host}...") log.debug(f"Removing domain block {id} at {host}...")
@ -552,9 +735,9 @@ def add_block(token: str, host: str, blockdata: DomainBlock, scheme: str='https'
log.warning(err['error']) log.warning(err['error'])
elif response.status_code != 200: elif response.status_code != 200:
raise ValueError(f"Something went wrong: {response.status_code}: {response.content}") raise ValueError(f"Something went wrong: {response.status_code}: {response.content}")
def push_blocklist(token: str, host: str, blocklist: list[DomainBlock], def push_blocklist(token: str, host: str, blocklist: list[DomainBlock],
dryrun: bool=False, dryrun: bool=False,
import_fields: list=['domain', 'severity'], import_fields: list=['domain', 'severity'],
@ -563,7 +746,7 @@ def push_blocklist(token: str, host: str, blocklist: list[DomainBlock],
override_private_comment: str=None override_private_comment: str=None
): ):
"""Push a blocklist to a remote instance. """Push a blocklist to a remote instance.
Updates existing entries if they exist, creates new blocks if they don't. Updates existing entries if they exist, creates new blocks if they don't.
@param token: The Bearer token for OAUTH API authentication @param token: The Bearer token for OAUTH API authentication
@ -579,11 +762,10 @@ def push_blocklist(token: str, host: str, blocklist: list[DomainBlock],
serverblocks = fetch_instance_blocklist(host, token, True, import_fields, scheme) serverblocks = fetch_instance_blocklist(host, token, True, import_fields, scheme)
# # Convert serverblocks to a dictionary keyed by domain name # # Convert serverblocks to a dictionary keyed by domain name
# knownblocks = {row.domain: row for row in serverblocks}
for newblock in blocklist.values(): for newblock in blocklist.values():
log.debug(f"Processing block: {newblock}") log.debug(f"Processing block: {newblock}")
# Already existing block
if newblock.domain in serverblocks: if newblock.domain in serverblocks:
log.debug(f"Block already exists for {newblock.domain}, checking for differences...") log.debug(f"Block already exists for {newblock.domain}, checking for differences...")
@ -612,19 +794,12 @@ def push_blocklist(token: str, host: str, blocklist: list[DomainBlock],
blockdata = oldblock.copy() blockdata = oldblock.copy()
blockdata.update(newblock) blockdata.update(newblock)
log.debug(f"Block as dict: {blockdata._asdict()}") log.debug(f"Block as dict: {blockdata._asdict()}")
if not dryrun:
update_known_block(token, host, blockdata, scheme)
# add a pause here so we don't melt the instance
time.sleep(API_CALL_DELAY)
else:
log.info("Dry run selected. Not applying changes.")
else: else:
log.debug("No differences detected. Not updating.") log.debug("No differences detected. Not updating.")
pass pass
else: else:
# New block
# stamp this record with a private comment, since we're the ones adding it # stamp this record with a private comment, since we're the ones adding it
if override_private_comment: if override_private_comment:
newblock.private_comment = override_private_comment newblock.private_comment = override_private_comment
@ -636,16 +811,56 @@ def push_blocklist(token: str, host: str, blocklist: list[DomainBlock],
# Make sure the new block doesn't clobber a domain with followers # Make sure the new block doesn't clobber a domain with followers
newblock.severity = check_followed_severity(host, token, newblock.domain, newblock.severity, max_followed_severity, scheme) newblock.severity = check_followed_severity(host, token, newblock.domain, newblock.severity, max_followed_severity, scheme)
if not dryrun:
add_block(token, host, newblock, scheme) # Append existing blocks to the new blocks (needed because Misskey
# add a pause here so we don't melt the instance # is all at once update operation)
time.sleep(API_CALL_DELAY) for existing_block in serverblocks.values():
else: if existing_block.domain not in blocklist:
log.info("Dry run selected. Not adding block.") blocklist.blocks[existing_block.domain] = existing_block
apply_blocks_misskey(blocklist, host, scheme, token)
def apply_blocks_misskey(blocklist: Blocklist, host:str, scheme: str="https", token: str=None):
# instead of being cool and trying to update everything, split
# into lists of suspends and silences, then call update meta with
# these new lists.
suspended_hosts = []
silenced_hosts = []
suspend_level = BlockSeverity("suspend")
silence_level = BlockSeverity("silence")
for block in blocklist.values():
if block.severity == suspend_level:
suspended_hosts.append(block.domain)
elif block.severity == silence_level:
silenced_hosts.append(block.domain)
# Make one big call to update-meta with new blocks and suspends
api_path = "/api/admin/update-meta"
parse_format = 'json'
headers = requests_headers(token)
url = f"{scheme}://{host}{api_path}"
update_data = {
"blockedHosts": suspended_hosts,
"silencedHosts": silenced_hosts
}
response = requests.post(url, headers=headers, timeout=REQUEST_TIMEOUT, json=update_data)
if response.status_code != 200 and response.status_code != 204:
log.error(f"{response.status_code} Unable to apply blocklist: {response.content}")
raise ValueError("Unable to apply block list: %s", response)
log.info(f"Updated meta with {len(suspended_hosts)} blocks and {len(silenced_hosts)} silences")
return True
def load_config(configfile: str): def load_config(configfile: str):
"""Augment commandline arguments with config file parameters """Augment commandline arguments with config file parameters
Config file is expected to be in TOML format Config file is expected to be in TOML format
""" """
conf = toml.load(configfile) conf = toml.load(configfile)
@ -723,7 +938,7 @@ def save_domain_block_audit_to_file(
def augment_args(args, tomldata: str=None): def augment_args(args, tomldata: str=None):
"""Augment commandline arguments with config file parameters """Augment commandline arguments with config file parameters
If tomldata is provided, uses that data instead of loading If tomldata is provided, uses that data instead of loading
from a config file. from a config file.
""" """
@ -749,7 +964,7 @@ def augment_args(args, tomldata: str=None):
if not args.override_private_comment: if not args.override_private_comment:
args.override_private_comment = conf.get('override_private_comment', None) args.override_private_comment = conf.get('override_private_comment', None)
if not args.savedir: if not args.savedir:
args.savedir = conf.get('savedir', '/tmp') args.savedir = conf.get('savedir', '/tmp')