support misskey blocks
This commit is contained in:
parent
4d12bac5a4
commit
28004d3c0b
|
@ -5,6 +5,7 @@ import argparse
|
|||
import toml
|
||||
import csv
|
||||
import requests
|
||||
import re
|
||||
import json
|
||||
import time
|
||||
import os.path
|
||||
|
@ -148,7 +149,7 @@ def fetch_from_urls(url_sources: dict,
|
|||
blocklists.append(bl)
|
||||
if save_intermediate:
|
||||
save_intermediate_blocklist(bl, savedir, export_fields)
|
||||
|
||||
|
||||
return blocklists
|
||||
|
||||
def fetch_from_instances(sources: dict,
|
||||
|
@ -245,7 +246,7 @@ def merge_blocklists(blocklists: list[Blocklist], mergeplan: str='max',
|
|||
if save_block_audit_file:
|
||||
blockdata:BlockAudit = {
|
||||
'domain': domain,
|
||||
'count': domain_matches_count,
|
||||
'count': domain_matches_count,
|
||||
'percent': domain_matches_percent,
|
||||
}
|
||||
audit.blocks[domain] = blockdata
|
||||
|
@ -258,7 +259,7 @@ def merge_blocklists(blocklists: list[Blocklist], mergeplan: str='max',
|
|||
|
||||
def apply_mergeplan(oldblock: DomainBlock, newblock: DomainBlock, mergeplan: str='max') -> dict:
|
||||
"""Use a mergeplan to decide how to merge two overlapping block definitions
|
||||
|
||||
|
||||
@param oldblock: The existing block definition.
|
||||
@param newblock: The new block definition we want to merge in.
|
||||
@param mergeplan: How to merge. Choices are 'max', the default, and 'min'.
|
||||
|
@ -276,7 +277,7 @@ def apply_mergeplan(oldblock: DomainBlock, newblock: DomainBlock, mergeplan: str
|
|||
except KeyError:
|
||||
log.debug(f"Key '{key}' missing from block definition so cannot compare. Continuing...")
|
||||
continue
|
||||
|
||||
|
||||
# How do we override an earlier block definition?
|
||||
if mergeplan in ['max', None]:
|
||||
# Use the highest block level found (the default)
|
||||
|
@ -285,7 +286,7 @@ def apply_mergeplan(oldblock: DomainBlock, newblock: DomainBlock, mergeplan: str
|
|||
if newblock.severity > oldblock.severity:
|
||||
# log.debug(f"New block severity is higher. Using that.")
|
||||
blockdata['severity'] = newblock.severity
|
||||
|
||||
|
||||
# For 'reject_media', 'reject_reports', and 'obfuscate' if
|
||||
# the value is set and is True for the domain in
|
||||
# any blocklist then the value is set to True.
|
||||
|
@ -345,7 +346,7 @@ def merge_comments(oldcomment:str, newcomment:str) -> str:
|
|||
# "boring, lack of moderation, nazis, scrapers"
|
||||
old_tokens = oldcomment.split(', ')
|
||||
new_tokens = newcomment.split(', ')
|
||||
|
||||
|
||||
# Remove any empty string tokens that we get
|
||||
while '' in old_tokens:
|
||||
old_tokens.remove('')
|
||||
|
@ -374,7 +375,141 @@ def requests_headers(token: str=None):
|
|||
|
||||
return headers
|
||||
|
||||
def get_nodeinfo(server, token: str=None, host_meta_fallback = False):
|
||||
headers = requests_headers(token)
|
||||
url = f'https://{server}/.well-known/nodeinfo'
|
||||
try:
|
||||
resp = requests.get(url, headers=headers, timeout=REQUEST_TIMEOUT)
|
||||
except Exception as ex:
|
||||
log.error(f"Error getting host node info for {server}. Exception: {ex}")
|
||||
return None
|
||||
|
||||
# if well-known nodeinfo isn't found, try to check host-meta for a webfinger URL
|
||||
# needed on servers where the display domain is different than the web domain
|
||||
if resp.status_code != 200 and not host_meta_fallback:
|
||||
# not found, try to check host-meta as a fallback
|
||||
log.warning(f'nodeinfo for {server} not found, checking host-meta')
|
||||
new_server = get_server_from_host_meta(server)
|
||||
if new_server is not None:
|
||||
if new_server == server:
|
||||
log.debug(f'host-meta for {server} did not get a new server.')
|
||||
return None
|
||||
else:
|
||||
return get_nodeinfo(new_server, True)
|
||||
else:
|
||||
return None
|
||||
|
||||
if resp.status_code == 200:
|
||||
nodeLoc = None
|
||||
try:
|
||||
nodeInfo = resp.json()
|
||||
for link in nodeInfo['links']:
|
||||
if link['rel'] in [
|
||||
'http://nodeinfo.diaspora.software/ns/schema/2.0',
|
||||
'http://nodeinfo.diaspora.software/ns/schema/2.1',
|
||||
]:
|
||||
nodeLoc = link['href']
|
||||
break
|
||||
except Exception as ex:
|
||||
log.error(f'error getting server {server} info from well-known node info. Exception: {ex}')
|
||||
return None
|
||||
else:
|
||||
log.error(f'Error getting well-known host node info for {server}. Status Code: {resp.status_code}')
|
||||
return None
|
||||
|
||||
if nodeLoc is None:
|
||||
log.warning(f'could not find link to node info in well-known nodeinfo of {server}')
|
||||
return None
|
||||
|
||||
# regrab server from nodeLoc, again in the case of different display and web domains
|
||||
match = re.match(
|
||||
r"https://(?P<server>[^/]+)/", nodeLoc
|
||||
)
|
||||
if match is None:
|
||||
log.error(f"Error getting web server name from {server}.")
|
||||
return None
|
||||
|
||||
server = match.group('server')
|
||||
|
||||
try:
|
||||
resp = requests.get(nodeLoc, headers=headers, timeout=REQUEST_TIMEOUT)
|
||||
except Exception as ex:
|
||||
log.error(f"Error getting host node info for {server}. Exception: {ex}")
|
||||
return None
|
||||
|
||||
if resp.status_code == 200:
|
||||
try:
|
||||
nodeInfo = resp.json()
|
||||
if 'activitypub' not in nodeInfo['protocols']:
|
||||
log.warning(f'server {server} does not support activitypub, skipping')
|
||||
return None
|
||||
return {
|
||||
'webserver': server,
|
||||
'software': nodeInfo['software']['name'],
|
||||
'version': nodeInfo['software']['version'],
|
||||
'rawnodeinfo': nodeInfo,
|
||||
}
|
||||
except Exception as ex:
|
||||
log.error(f'error getting server {server} info from nodeinfo. Exception: {ex}')
|
||||
return None
|
||||
else:
|
||||
log.error(f'Error getting host node info for {server}. Status Code: {resp.status_code}')
|
||||
return None
|
||||
|
||||
def set_server_apis(server):
|
||||
# support for new server software should be added here
|
||||
software_apis = {
|
||||
'mastodonApiSupport': ['mastodon', 'pleroma', 'akkoma', 'pixelfed', 'hometown', 'iceshrimp'],
|
||||
'misskeyApiSupport': ['misskey', 'calckey', 'firefish', 'foundkey', 'sharkey'],
|
||||
'lemmyApiSupport': ['lemmy']
|
||||
}
|
||||
|
||||
for api, softwareList in software_apis.items():
|
||||
server[api] = server['software'] in softwareList
|
||||
|
||||
# search `features` list in metadata if available
|
||||
if 'metadata' in server['rawnodeinfo'] and 'features' in server['rawnodeinfo']['metadata'] and type(server['rawnodeinfo']['metadata']['features']) is list:
|
||||
features = server['rawnodeinfo']['metadata']['features']
|
||||
if 'mastodon_api' in features:
|
||||
server['mastodonApiSupport'] = True
|
||||
|
||||
def fetch_server_info(server, token: str=None):
|
||||
nodeinfo = get_nodeinfo(server, token=token)
|
||||
if nodeinfo is None:
|
||||
return None
|
||||
else:
|
||||
set_server_apis(nodeinfo)
|
||||
return nodeinfo
|
||||
|
||||
def fetch_api_type(host: str, token: str=None) -> str:
|
||||
server_info = fetch_server_info(host, token)
|
||||
# Many Misskey supports also support Mastodon API to varying
|
||||
# extents, so we should check Misskey API support before checking
|
||||
# Mastodon API support.
|
||||
if server_info['misskeyApiSupport']:
|
||||
return "misskey"
|
||||
elif server_info['mastodonApiSupport']:
|
||||
return "mastodon"
|
||||
else:
|
||||
sys.exit(f"Unknown API type for {host}")
|
||||
|
||||
|
||||
def fetch_instance_blocklist(host: str, token: str=None, admin: bool=False,
|
||||
import_fields: list=['domain', 'severity'],
|
||||
scheme: str='https') -> list[DomainBlock]:
|
||||
"""
|
||||
Fetch existing block from server (Misskey or Mastodon)
|
||||
"""
|
||||
api_type = fetch_api_type(host)
|
||||
|
||||
if api_type == 'mastodon':
|
||||
return fetch_instance_blocklist_mastodon(host, token, admin, import_fields, scheme)
|
||||
elif api_type == 'misskey':
|
||||
return fetch_instance_blocklist_misskey(host, token, admin, import_fields, scheme)
|
||||
else:
|
||||
sys.exit(f"Unknown API type for server {host}")
|
||||
|
||||
def fetch_instance_blocklist_mastodon(host: str, token: str=None, admin: bool=False,
|
||||
import_fields: list=['domain', 'severity'],
|
||||
scheme: str='https') -> list[DomainBlock]:
|
||||
"""Fetch existing block list from server
|
||||
|
@ -385,7 +520,7 @@ def fetch_instance_blocklist(host: str, token: str=None, admin: bool=False,
|
|||
@param import_fields: A list of fields to import from the remote instance.
|
||||
@returns: A list of the domain blocks from the instance.
|
||||
"""
|
||||
log.info(f"Fetching instance blocklist from {host} ...")
|
||||
log.info(f"Fetching instance blocklist (Mastodon) from {host} ...")
|
||||
|
||||
if admin:
|
||||
api_path = "/api/v1/admin/domain_blocks"
|
||||
|
@ -424,7 +559,7 @@ def fetch_instance_blocklist(host: str, token: str=None, admin: bool=False,
|
|||
else:
|
||||
next = pagination[0]
|
||||
# prev = pagination[1]
|
||||
|
||||
|
||||
urlstring, rel = next.split('; ')
|
||||
url = urlstring.strip('<').rstrip('>')
|
||||
|
||||
|
@ -432,6 +567,54 @@ def fetch_instance_blocklist(host: str, token: str=None, admin: bool=False,
|
|||
|
||||
return blocklist
|
||||
|
||||
def fetch_instance_blocklist_misskey(host: str, token: str=None, admin: bool=False,
|
||||
import_fields: list=['domain', 'severity'],
|
||||
scheme: str='https') -> list[DomainBlock]:
|
||||
"""Fetch existing block list from server
|
||||
|
||||
@param host: The remote host to connect to.
|
||||
@param token: The (required) OAuth Bearer token to authenticate with.
|
||||
@param admin: Boolean flag to use the admin API if True.
|
||||
@param import_fields: A list of fields to import from the remote instance.
|
||||
@returns: A list of the domain blocks from the instance.
|
||||
"""
|
||||
log.info(f"Fetching instance blocklist (Misskey) from {host} ...")
|
||||
|
||||
if admin:
|
||||
api_path = "/api/admin/meta"
|
||||
parse_format = 'json'
|
||||
else:
|
||||
sys.exit("Must be an admin to use the meta API")
|
||||
|
||||
headers = requests_headers(token)
|
||||
|
||||
url = f"{scheme}://{host}{api_path}"
|
||||
|
||||
blockdata = []
|
||||
|
||||
response = requests.post(url, headers=headers, timeout=REQUEST_TIMEOUT, json={})
|
||||
if response.status_code != 200:
|
||||
log.error(f"{response.status_code} Cannot fetch remote blocklist: {response.content}")
|
||||
raise ValueError("Unable to fetch domain block list: %s", response)
|
||||
|
||||
# Convert the Misskey block (which are just domains) into
|
||||
# something similar to Mastodon response that Fediblockhole
|
||||
# can understand.
|
||||
def map_block(domain, severity):
|
||||
return { "domain": domain, "digest": "", "severity": severity, "comment": "" }
|
||||
|
||||
meta = json.loads(response.content.decode('utf-8'))
|
||||
|
||||
blocked_hosts = [ map_block(domain, "suspend") for domain in meta['blockedHosts'] ]
|
||||
silenced_hosts = [ map_block(domain, "silence") for domain in meta['silencedHosts'] ]
|
||||
|
||||
blockdata.extend(blocked_hosts)
|
||||
blockdata.extend(silenced_hosts)
|
||||
|
||||
blocklist = parse_blocklist(blockdata, url, parse_format, import_fields)
|
||||
|
||||
return blocklist
|
||||
|
||||
def delete_block(token: str, host: str, id: int, scheme: str='https'):
|
||||
"""Remove a domain block"""
|
||||
log.debug(f"Removing domain block {id} at {host}...")
|
||||
|
@ -552,9 +735,9 @@ def add_block(token: str, host: str, blockdata: DomainBlock, scheme: str='https'
|
|||
log.warning(err['error'])
|
||||
|
||||
elif response.status_code != 200:
|
||||
|
||||
|
||||
raise ValueError(f"Something went wrong: {response.status_code}: {response.content}")
|
||||
|
||||
|
||||
def push_blocklist(token: str, host: str, blocklist: list[DomainBlock],
|
||||
dryrun: bool=False,
|
||||
import_fields: list=['domain', 'severity'],
|
||||
|
@ -563,7 +746,7 @@ def push_blocklist(token: str, host: str, blocklist: list[DomainBlock],
|
|||
override_private_comment: str=None
|
||||
):
|
||||
"""Push a blocklist to a remote instance.
|
||||
|
||||
|
||||
Updates existing entries if they exist, creates new blocks if they don't.
|
||||
|
||||
@param token: The Bearer token for OAUTH API authentication
|
||||
|
@ -579,11 +762,10 @@ def push_blocklist(token: str, host: str, blocklist: list[DomainBlock],
|
|||
serverblocks = fetch_instance_blocklist(host, token, True, import_fields, scheme)
|
||||
|
||||
# # Convert serverblocks to a dictionary keyed by domain name
|
||||
# knownblocks = {row.domain: row for row in serverblocks}
|
||||
|
||||
for newblock in blocklist.values():
|
||||
|
||||
log.debug(f"Processing block: {newblock}")
|
||||
|
||||
# Already existing block
|
||||
if newblock.domain in serverblocks:
|
||||
log.debug(f"Block already exists for {newblock.domain}, checking for differences...")
|
||||
|
||||
|
@ -612,19 +794,12 @@ def push_blocklist(token: str, host: str, blocklist: list[DomainBlock],
|
|||
blockdata = oldblock.copy()
|
||||
blockdata.update(newblock)
|
||||
log.debug(f"Block as dict: {blockdata._asdict()}")
|
||||
|
||||
if not dryrun:
|
||||
update_known_block(token, host, blockdata, scheme)
|
||||
# add a pause here so we don't melt the instance
|
||||
time.sleep(API_CALL_DELAY)
|
||||
else:
|
||||
log.info("Dry run selected. Not applying changes.")
|
||||
|
||||
else:
|
||||
log.debug("No differences detected. Not updating.")
|
||||
pass
|
||||
|
||||
else:
|
||||
# New block
|
||||
# stamp this record with a private comment, since we're the ones adding it
|
||||
if override_private_comment:
|
||||
newblock.private_comment = override_private_comment
|
||||
|
@ -636,16 +811,56 @@ def push_blocklist(token: str, host: str, blocklist: list[DomainBlock],
|
|||
|
||||
# Make sure the new block doesn't clobber a domain with followers
|
||||
newblock.severity = check_followed_severity(host, token, newblock.domain, newblock.severity, max_followed_severity, scheme)
|
||||
if not dryrun:
|
||||
add_block(token, host, newblock, scheme)
|
||||
# add a pause here so we don't melt the instance
|
||||
time.sleep(API_CALL_DELAY)
|
||||
else:
|
||||
log.info("Dry run selected. Not adding block.")
|
||||
|
||||
# Append existing blocks to the new blocks (needed because Misskey
|
||||
# is all at once update operation)
|
||||
for existing_block in serverblocks.values():
|
||||
if existing_block.domain not in blocklist:
|
||||
blocklist.blocks[existing_block.domain] = existing_block
|
||||
|
||||
apply_blocks_misskey(blocklist, host, scheme, token)
|
||||
|
||||
def apply_blocks_misskey(blocklist: Blocklist, host:str, scheme: str="https", token: str=None):
|
||||
# instead of being cool and trying to update everything, split
|
||||
# into lists of suspends and silences, then call update meta with
|
||||
# these new lists.
|
||||
suspended_hosts = []
|
||||
silenced_hosts = []
|
||||
|
||||
suspend_level = BlockSeverity("suspend")
|
||||
silence_level = BlockSeverity("silence")
|
||||
|
||||
for block in blocklist.values():
|
||||
if block.severity == suspend_level:
|
||||
suspended_hosts.append(block.domain)
|
||||
elif block.severity == silence_level:
|
||||
silenced_hosts.append(block.domain)
|
||||
|
||||
# Make one big call to update-meta with new blocks and suspends
|
||||
api_path = "/api/admin/update-meta"
|
||||
parse_format = 'json'
|
||||
|
||||
headers = requests_headers(token)
|
||||
|
||||
url = f"{scheme}://{host}{api_path}"
|
||||
|
||||
update_data = {
|
||||
"blockedHosts": suspended_hosts,
|
||||
"silencedHosts": silenced_hosts
|
||||
}
|
||||
|
||||
response = requests.post(url, headers=headers, timeout=REQUEST_TIMEOUT, json=update_data)
|
||||
if response.status_code != 200 and response.status_code != 204:
|
||||
log.error(f"{response.status_code} Unable to apply blocklist: {response.content}")
|
||||
raise ValueError("Unable to apply block list: %s", response)
|
||||
|
||||
log.info(f"Updated meta with {len(suspended_hosts)} blocks and {len(silenced_hosts)} silences")
|
||||
return True
|
||||
|
||||
|
||||
def load_config(configfile: str):
|
||||
"""Augment commandline arguments with config file parameters
|
||||
|
||||
|
||||
Config file is expected to be in TOML format
|
||||
"""
|
||||
conf = toml.load(configfile)
|
||||
|
@ -723,7 +938,7 @@ def save_domain_block_audit_to_file(
|
|||
|
||||
def augment_args(args, tomldata: str=None):
|
||||
"""Augment commandline arguments with config file parameters
|
||||
|
||||
|
||||
If tomldata is provided, uses that data instead of loading
|
||||
from a config file.
|
||||
"""
|
||||
|
@ -749,7 +964,7 @@ def augment_args(args, tomldata: str=None):
|
|||
|
||||
if not args.override_private_comment:
|
||||
args.override_private_comment = conf.get('override_private_comment', None)
|
||||
|
||||
|
||||
if not args.savedir:
|
||||
args.savedir = conf.get('savedir', '/tmp')
|
||||
|
||||
|
|
Loading…
Reference in New Issue