support misskey blocks

This commit is contained in:
projectmoon 2023-11-30 12:03:51 +01:00
parent 4d12bac5a4
commit 28004d3c0b
1 changed files with 246 additions and 31 deletions

View File

@ -5,6 +5,7 @@ import argparse
import toml
import csv
import requests
import re
import json
import time
import os.path
@ -148,7 +149,7 @@ def fetch_from_urls(url_sources: dict,
blocklists.append(bl)
if save_intermediate:
save_intermediate_blocklist(bl, savedir, export_fields)
return blocklists
def fetch_from_instances(sources: dict,
@ -245,7 +246,7 @@ def merge_blocklists(blocklists: list[Blocklist], mergeplan: str='max',
if save_block_audit_file:
blockdata:BlockAudit = {
'domain': domain,
'count': domain_matches_count,
'count': domain_matches_count,
'percent': domain_matches_percent,
}
audit.blocks[domain] = blockdata
@ -258,7 +259,7 @@ def merge_blocklists(blocklists: list[Blocklist], mergeplan: str='max',
def apply_mergeplan(oldblock: DomainBlock, newblock: DomainBlock, mergeplan: str='max') -> dict:
"""Use a mergeplan to decide how to merge two overlapping block definitions
@param oldblock: The existing block definition.
@param newblock: The new block definition we want to merge in.
@param mergeplan: How to merge. Choices are 'max', the default, and 'min'.
@ -276,7 +277,7 @@ def apply_mergeplan(oldblock: DomainBlock, newblock: DomainBlock, mergeplan: str
except KeyError:
log.debug(f"Key '{key}' missing from block definition so cannot compare. Continuing...")
continue
# How do we override an earlier block definition?
if mergeplan in ['max', None]:
# Use the highest block level found (the default)
@ -285,7 +286,7 @@ def apply_mergeplan(oldblock: DomainBlock, newblock: DomainBlock, mergeplan: str
if newblock.severity > oldblock.severity:
# log.debug(f"New block severity is higher. Using that.")
blockdata['severity'] = newblock.severity
# For 'reject_media', 'reject_reports', and 'obfuscate' if
# the value is set and is True for the domain in
# any blocklist then the value is set to True.
@ -345,7 +346,7 @@ def merge_comments(oldcomment:str, newcomment:str) -> str:
# "boring, lack of moderation, nazis, scrapers"
old_tokens = oldcomment.split(', ')
new_tokens = newcomment.split(', ')
# Remove any empty string tokens that we get
while '' in old_tokens:
old_tokens.remove('')
@ -374,7 +375,141 @@ def requests_headers(token: str=None):
return headers
def get_nodeinfo(server, token: str=None, host_meta_fallback = False):
headers = requests_headers(token)
url = f'https://{server}/.well-known/nodeinfo'
try:
resp = requests.get(url, headers=headers, timeout=REQUEST_TIMEOUT)
except Exception as ex:
log.error(f"Error getting host node info for {server}. Exception: {ex}")
return None
# if well-known nodeinfo isn't found, try to check host-meta for a webfinger URL
# needed on servers where the display domain is different than the web domain
if resp.status_code != 200 and not host_meta_fallback:
# not found, try to check host-meta as a fallback
log.warning(f'nodeinfo for {server} not found, checking host-meta')
new_server = get_server_from_host_meta(server)
if new_server is not None:
if new_server == server:
log.debug(f'host-meta for {server} did not get a new server.')
return None
else:
return get_nodeinfo(new_server, True)
else:
return None
if resp.status_code == 200:
nodeLoc = None
try:
nodeInfo = resp.json()
for link in nodeInfo['links']:
if link['rel'] in [
'http://nodeinfo.diaspora.software/ns/schema/2.0',
'http://nodeinfo.diaspora.software/ns/schema/2.1',
]:
nodeLoc = link['href']
break
except Exception as ex:
log.error(f'error getting server {server} info from well-known node info. Exception: {ex}')
return None
else:
log.error(f'Error getting well-known host node info for {server}. Status Code: {resp.status_code}')
return None
if nodeLoc is None:
log.warning(f'could not find link to node info in well-known nodeinfo of {server}')
return None
# regrab server from nodeLoc, again in the case of different display and web domains
match = re.match(
r"https://(?P<server>[^/]+)/", nodeLoc
)
if match is None:
log.error(f"Error getting web server name from {server}.")
return None
server = match.group('server')
try:
resp = requests.get(nodeLoc, headers=headers, timeout=REQUEST_TIMEOUT)
except Exception as ex:
log.error(f"Error getting host node info for {server}. Exception: {ex}")
return None
if resp.status_code == 200:
try:
nodeInfo = resp.json()
if 'activitypub' not in nodeInfo['protocols']:
log.warning(f'server {server} does not support activitypub, skipping')
return None
return {
'webserver': server,
'software': nodeInfo['software']['name'],
'version': nodeInfo['software']['version'],
'rawnodeinfo': nodeInfo,
}
except Exception as ex:
log.error(f'error getting server {server} info from nodeinfo. Exception: {ex}')
return None
else:
log.error(f'Error getting host node info for {server}. Status Code: {resp.status_code}')
return None
def set_server_apis(server):
# support for new server software should be added here
software_apis = {
'mastodonApiSupport': ['mastodon', 'pleroma', 'akkoma', 'pixelfed', 'hometown', 'iceshrimp'],
'misskeyApiSupport': ['misskey', 'calckey', 'firefish', 'foundkey', 'sharkey'],
'lemmyApiSupport': ['lemmy']
}
for api, softwareList in software_apis.items():
server[api] = server['software'] in softwareList
# search `features` list in metadata if available
if 'metadata' in server['rawnodeinfo'] and 'features' in server['rawnodeinfo']['metadata'] and type(server['rawnodeinfo']['metadata']['features']) is list:
features = server['rawnodeinfo']['metadata']['features']
if 'mastodon_api' in features:
server['mastodonApiSupport'] = True
def fetch_server_info(server, token: str=None):
nodeinfo = get_nodeinfo(server, token=token)
if nodeinfo is None:
return None
else:
set_server_apis(nodeinfo)
return nodeinfo
def fetch_api_type(host: str, token: str=None) -> str:
server_info = fetch_server_info(host, token)
# Many Misskey supports also support Mastodon API to varying
# extents, so we should check Misskey API support before checking
# Mastodon API support.
if server_info['misskeyApiSupport']:
return "misskey"
elif server_info['mastodonApiSupport']:
return "mastodon"
else:
sys.exit(f"Unknown API type for {host}")
def fetch_instance_blocklist(host: str, token: str=None, admin: bool=False,
import_fields: list=['domain', 'severity'],
scheme: str='https') -> list[DomainBlock]:
"""
Fetch existing block from server (Misskey or Mastodon)
"""
api_type = fetch_api_type(host)
if api_type == 'mastodon':
return fetch_instance_blocklist_mastodon(host, token, admin, import_fields, scheme)
elif api_type == 'misskey':
return fetch_instance_blocklist_misskey(host, token, admin, import_fields, scheme)
else:
sys.exit(f"Unknown API type for server {host}")
def fetch_instance_blocklist_mastodon(host: str, token: str=None, admin: bool=False,
import_fields: list=['domain', 'severity'],
scheme: str='https') -> list[DomainBlock]:
"""Fetch existing block list from server
@ -385,7 +520,7 @@ def fetch_instance_blocklist(host: str, token: str=None, admin: bool=False,
@param import_fields: A list of fields to import from the remote instance.
@returns: A list of the domain blocks from the instance.
"""
log.info(f"Fetching instance blocklist from {host} ...")
log.info(f"Fetching instance blocklist (Mastodon) from {host} ...")
if admin:
api_path = "/api/v1/admin/domain_blocks"
@ -424,7 +559,7 @@ def fetch_instance_blocklist(host: str, token: str=None, admin: bool=False,
else:
next = pagination[0]
# prev = pagination[1]
urlstring, rel = next.split('; ')
url = urlstring.strip('<').rstrip('>')
@ -432,6 +567,54 @@ def fetch_instance_blocklist(host: str, token: str=None, admin: bool=False,
return blocklist
def fetch_instance_blocklist_misskey(host: str, token: str=None, admin: bool=False,
import_fields: list=['domain', 'severity'],
scheme: str='https') -> list[DomainBlock]:
"""Fetch existing block list from server
@param host: The remote host to connect to.
@param token: The (required) OAuth Bearer token to authenticate with.
@param admin: Boolean flag to use the admin API if True.
@param import_fields: A list of fields to import from the remote instance.
@returns: A list of the domain blocks from the instance.
"""
log.info(f"Fetching instance blocklist (Misskey) from {host} ...")
if admin:
api_path = "/api/admin/meta"
parse_format = 'json'
else:
sys.exit("Must be an admin to use the meta API")
headers = requests_headers(token)
url = f"{scheme}://{host}{api_path}"
blockdata = []
response = requests.post(url, headers=headers, timeout=REQUEST_TIMEOUT, json={})
if response.status_code != 200:
log.error(f"{response.status_code} Cannot fetch remote blocklist: {response.content}")
raise ValueError("Unable to fetch domain block list: %s", response)
# Convert the Misskey block (which are just domains) into
# something similar to Mastodon response that Fediblockhole
# can understand.
def map_block(domain, severity):
return { "domain": domain, "digest": "", "severity": severity, "comment": "" }
meta = json.loads(response.content.decode('utf-8'))
blocked_hosts = [ map_block(domain, "suspend") for domain in meta['blockedHosts'] ]
silenced_hosts = [ map_block(domain, "silence") for domain in meta['silencedHosts'] ]
blockdata.extend(blocked_hosts)
blockdata.extend(silenced_hosts)
blocklist = parse_blocklist(blockdata, url, parse_format, import_fields)
return blocklist
def delete_block(token: str, host: str, id: int, scheme: str='https'):
"""Remove a domain block"""
log.debug(f"Removing domain block {id} at {host}...")
@ -552,9 +735,9 @@ def add_block(token: str, host: str, blockdata: DomainBlock, scheme: str='https'
log.warning(err['error'])
elif response.status_code != 200:
raise ValueError(f"Something went wrong: {response.status_code}: {response.content}")
def push_blocklist(token: str, host: str, blocklist: list[DomainBlock],
dryrun: bool=False,
import_fields: list=['domain', 'severity'],
@ -563,7 +746,7 @@ def push_blocklist(token: str, host: str, blocklist: list[DomainBlock],
override_private_comment: str=None
):
"""Push a blocklist to a remote instance.
Updates existing entries if they exist, creates new blocks if they don't.
@param token: The Bearer token for OAUTH API authentication
@ -579,11 +762,10 @@ def push_blocklist(token: str, host: str, blocklist: list[DomainBlock],
serverblocks = fetch_instance_blocklist(host, token, True, import_fields, scheme)
# # Convert serverblocks to a dictionary keyed by domain name
# knownblocks = {row.domain: row for row in serverblocks}
for newblock in blocklist.values():
log.debug(f"Processing block: {newblock}")
# Already existing block
if newblock.domain in serverblocks:
log.debug(f"Block already exists for {newblock.domain}, checking for differences...")
@ -612,19 +794,12 @@ def push_blocklist(token: str, host: str, blocklist: list[DomainBlock],
blockdata = oldblock.copy()
blockdata.update(newblock)
log.debug(f"Block as dict: {blockdata._asdict()}")
if not dryrun:
update_known_block(token, host, blockdata, scheme)
# add a pause here so we don't melt the instance
time.sleep(API_CALL_DELAY)
else:
log.info("Dry run selected. Not applying changes.")
else:
log.debug("No differences detected. Not updating.")
pass
else:
# New block
# stamp this record with a private comment, since we're the ones adding it
if override_private_comment:
newblock.private_comment = override_private_comment
@ -636,16 +811,56 @@ def push_blocklist(token: str, host: str, blocklist: list[DomainBlock],
# Make sure the new block doesn't clobber a domain with followers
newblock.severity = check_followed_severity(host, token, newblock.domain, newblock.severity, max_followed_severity, scheme)
if not dryrun:
add_block(token, host, newblock, scheme)
# add a pause here so we don't melt the instance
time.sleep(API_CALL_DELAY)
else:
log.info("Dry run selected. Not adding block.")
# Append existing blocks to the new blocks (needed because Misskey
# is all at once update operation)
for existing_block in serverblocks.values():
if existing_block.domain not in blocklist:
blocklist.blocks[existing_block.domain] = existing_block
apply_blocks_misskey(blocklist, host, scheme, token)
def apply_blocks_misskey(blocklist: Blocklist, host:str, scheme: str="https", token: str=None):
# instead of being cool and trying to update everything, split
# into lists of suspends and silences, then call update meta with
# these new lists.
suspended_hosts = []
silenced_hosts = []
suspend_level = BlockSeverity("suspend")
silence_level = BlockSeverity("silence")
for block in blocklist.values():
if block.severity == suspend_level:
suspended_hosts.append(block.domain)
elif block.severity == silence_level:
silenced_hosts.append(block.domain)
# Make one big call to update-meta with new blocks and suspends
api_path = "/api/admin/update-meta"
parse_format = 'json'
headers = requests_headers(token)
url = f"{scheme}://{host}{api_path}"
update_data = {
"blockedHosts": suspended_hosts,
"silencedHosts": silenced_hosts
}
response = requests.post(url, headers=headers, timeout=REQUEST_TIMEOUT, json=update_data)
if response.status_code != 200 and response.status_code != 204:
log.error(f"{response.status_code} Unable to apply blocklist: {response.content}")
raise ValueError("Unable to apply block list: %s", response)
log.info(f"Updated meta with {len(suspended_hosts)} blocks and {len(silenced_hosts)} silences")
return True
def load_config(configfile: str):
"""Augment commandline arguments with config file parameters
Config file is expected to be in TOML format
"""
conf = toml.load(configfile)
@ -723,7 +938,7 @@ def save_domain_block_audit_to_file(
def augment_args(args, tomldata: str=None):
"""Augment commandline arguments with config file parameters
If tomldata is provided, uses that data instead of loading
from a config file.
"""
@ -749,7 +964,7 @@ def augment_args(args, tomldata: str=None):
if not args.override_private_comment:
args.override_private_comment = conf.get('override_private_comment', None)
if not args.savedir:
args.savedir = conf.get('savedir', '/tmp')