support misskey blocks

Merge pull request #59 from sgrigson/override-private-comment
Override private comment
2023-11-30 12:03:51 +01:00 · 2023-10-03 08:41:16 +11:00 · 2023-10-02 13:48:13 -05:00 · 2023-09-12 08:14:53 -05:00 · 2023-09-12 08:13:17 -05:00 · 2023-09-12 02:43:02 -05:00
3 changed files with 257 additions and 36 deletions
--- a/README.md
+++ b/README.md
@ -386,6 +386,14 @@ Skip the fetching of blocklists from any URLs that are configured.
 Skip the fetching of blocklists from any remote instances that are configured.
 ### override_private_comment
 Defaults to None.
 Stamp all *new* blocks pushed to a remote server with this comment or code. 
 Helps to identify blocks you've created on a server via Fediblockhole versus ones that
 already existed.
 ### mergeplan
 If two (or more) blocklists define blocks for the same domain, but they're
--- a/etc/sample.fediblockhole.conf.toml
+++ b/etc/sample.fediblockhole.conf.toml
@ -77,8 +77,9 @@ blocklist_instance_destinations = [
 # merge_threshold_type = 'count'
 # merge_threshold = 0
-## set an override private comment
+## set an override private comment to be added when pushing a NEW block to an instance
-# override_private_comment = 'Updated by Fediblockhole'
+# this does not require importing private comments
 # override_private_comment = 'Added by Fediblock Sync'
 ## Set which fields we import
 ## 'domain' and 'severity' are always imported, these are additional
--- a/src/fediblockhole/init.py
+++ b/src/fediblockhole/init.py
@ -5,6 +5,7 @@ import argparse
 import toml
 import csv
 import requests
 import re
 import json
 import time
 import os.path
@ -148,7 +149,7 @@ def fetch_from_urls(url_sources: dict,
            blocklists.append(bl)
            if save_intermediate:
                save_intermediate_blocklist(bl, savedir, export_fields)
-    
+
    return blocklists
 def fetch_from_instances(sources: dict,
@ -245,7 +246,7 @@ def merge_blocklists(blocklists: list[Blocklist], mergeplan: str='max',
        if save_block_audit_file:
            blockdata:BlockAudit = {
                'domain': domain,
-                'count': domain_matches_count, 
+                'count': domain_matches_count,
                'percent': domain_matches_percent,
            }
            audit.blocks[domain] = blockdata
@ -258,7 +259,7 @@ def merge_blocklists(blocklists: list[Blocklist], mergeplan: str='max',
 def apply_mergeplan(oldblock: DomainBlock, newblock: DomainBlock, mergeplan: str='max') -> dict:
    """Use a mergeplan to decide how to merge two overlapping block definitions
-    
+
    @param oldblock: The existing block definition.
    @param newblock: The new block definition we want to merge in.
    @param mergeplan: How to merge. Choices are 'max', the default, and 'min'.
@ -276,7 +277,7 @@ def apply_mergeplan(oldblock: DomainBlock, newblock: DomainBlock, mergeplan: str
        except KeyError:
            log.debug(f"Key '{key}' missing from block definition so cannot compare. Continuing...")
            continue
-    
+
    # How do we override an earlier block definition?
    if mergeplan in ['max', None]:
        # Use the highest block level found (the default)
@ -285,7 +286,7 @@ def apply_mergeplan(oldblock: DomainBlock, newblock: DomainBlock, mergeplan: str
        if newblock.severity > oldblock.severity:
            # log.debug(f"New block severity is higher. Using that.")
            blockdata['severity'] = newblock.severity
-        
+
        # For 'reject_media', 'reject_reports', and 'obfuscate' if
        # the value is set and is True for the domain in
        # any blocklist then the value is set to True.
@ -345,7 +346,7 @@ def merge_comments(oldcomment:str, newcomment:str) -> str:
    # "boring, lack of moderation, nazis, scrapers"
    old_tokens = oldcomment.split(', ')
    new_tokens = newcomment.split(', ')
-    
+
    # Remove any empty string tokens that we get
    while '' in old_tokens:
        old_tokens.remove('')
@ -374,7 +375,141 @@ def requests_headers(token: str=None):
    return headers
 def get_nodeinfo(server, token: str=None, host_meta_fallback = False):
    headers = requests_headers(token)
    url = f'https://{server}/.well-known/nodeinfo'
    try:
        resp = requests.get(url, headers=headers, timeout=REQUEST_TIMEOUT)
    except Exception as ex:
        log.error(f"Error getting host node info for {server}. Exception: {ex}")
        return None
    # if well-known nodeinfo isn't found, try to check host-meta for a webfinger URL
    # needed on servers where the display domain is different than the web domain
    if resp.status_code != 200 and not host_meta_fallback:
        # not found, try to check host-meta as a fallback
        log.warning(f'nodeinfo for {server} not found, checking host-meta')
        new_server = get_server_from_host_meta(server)
        if new_server is not None:
            if new_server == server:
                log.debug(f'host-meta for {server} did not get a new server.')
                return None
            else:
                return get_nodeinfo(new_server, True)
        else:
            return None
    if resp.status_code == 200:
        nodeLoc = None
        try:
            nodeInfo = resp.json()
            for link in nodeInfo['links']:
                if link['rel'] in [
                    'http://nodeinfo.diaspora.software/ns/schema/2.0',
                    'http://nodeinfo.diaspora.software/ns/schema/2.1',
                ]:
                    nodeLoc = link['href']
                    break
        except Exception as ex:
            log.error(f'error getting server {server} info from well-known node info. Exception: {ex}')
            return None
    else:
        log.error(f'Error getting well-known host node info for {server}. Status Code: {resp.status_code}')
        return None
    if nodeLoc is None:
        log.warning(f'could not find link to node info in well-known nodeinfo of {server}')
        return None
    # regrab server from nodeLoc, again in the case of different display and web domains
    match = re.match(
        r"https://(?P<server>[^/]+)/", nodeLoc
    )
    if match is None:
        log.error(f"Error getting web server name from {server}.")
        return None
    server = match.group('server')
    try:
        resp = requests.get(nodeLoc, headers=headers, timeout=REQUEST_TIMEOUT)
    except Exception as ex:
        log.error(f"Error getting host node info for {server}. Exception: {ex}")
        return None
    if resp.status_code == 200:
        try:
            nodeInfo = resp.json()
            if 'activitypub' not in nodeInfo['protocols']:
                log.warning(f'server {server} does not support activitypub, skipping')
                return None
            return {
                'webserver': server,
                'software': nodeInfo['software']['name'],
                'version': nodeInfo['software']['version'],
                'rawnodeinfo': nodeInfo,
            }
        except Exception as ex:
            log.error(f'error getting server {server} info from nodeinfo. Exception: {ex}')
            return None
    else:
        log.error(f'Error getting host node info for {server}. Status Code: {resp.status_code}')
        return None
 def set_server_apis(server):
    # support for new server software should be added here
    software_apis = {
        'mastodonApiSupport': ['mastodon', 'pleroma', 'akkoma', 'pixelfed', 'hometown', 'iceshrimp'],
        'misskeyApiSupport': ['misskey', 'calckey', 'firefish', 'foundkey', 'sharkey'],
        'lemmyApiSupport': ['lemmy']
    }
    for api, softwareList in software_apis.items():
        server[api] = server['software'] in softwareList
    # search `features` list in metadata if available
    if 'metadata' in server['rawnodeinfo'] and 'features' in server['rawnodeinfo']['metadata'] and type(server['rawnodeinfo']['metadata']['features']) is list:
        features = server['rawnodeinfo']['metadata']['features']
        if 'mastodon_api' in features:
            server['mastodonApiSupport'] = True
 def fetch_server_info(server, token: str=None):
    nodeinfo = get_nodeinfo(server, token=token)
    if nodeinfo is None:
        return None
    else:
        set_server_apis(nodeinfo)
        return nodeinfo
 def fetch_api_type(host: str, token: str=None) -> str:
    server_info = fetch_server_info(host, token)
    # Many Misskey supports also support Mastodon API to varying
    # extents, so we should check Misskey API support before checking
    # Mastodon API support.
    if server_info['misskeyApiSupport']:
        return "misskey"
    elif server_info['mastodonApiSupport']:
        return "mastodon"
    else:
        sys.exit(f"Unknown API type for {host}")
 def fetch_instance_blocklist(host: str, token: str=None, admin: bool=False,
    import_fields: list=['domain', 'severity'],
    scheme: str='https') -> list[DomainBlock]:
    """
    Fetch existing block from server (Misskey or Mastodon)
    """
    api_type = fetch_api_type(host)
    if api_type == 'mastodon':
        return fetch_instance_blocklist_mastodon(host, token, admin, import_fields, scheme)
    elif api_type == 'misskey':
        return fetch_instance_blocklist_misskey(host, token, admin, import_fields, scheme)
    else:
        sys.exit(f"Unknown API type for server {host}")
 def fetch_instance_blocklist_mastodon(host: str, token: str=None, admin: bool=False,
    import_fields: list=['domain', 'severity'],
    scheme: str='https') -> list[DomainBlock]:
    """Fetch existing block list from server
@ -385,7 +520,7 @@ def fetch_instance_blocklist(host: str, token: str=None, admin: bool=False,
    @param import_fields: A list of fields to import from the remote instance.
    @returns: A list of the domain blocks from the instance.
    """
-    log.info(f"Fetching instance blocklist from {host} ...")
+    log.info(f"Fetching instance blocklist (Mastodon) from {host} ...")
    if admin:
        api_path = "/api/v1/admin/domain_blocks"
@ -424,7 +559,7 @@ def fetch_instance_blocklist(host: str, token: str=None, admin: bool=False,
        else:
            next = pagination[0]
            # prev = pagination[1]
-        
+
            urlstring, rel = next.split('; ')
            url = urlstring.strip('<').rstrip('>')
@ -432,6 +567,54 @@ def fetch_instance_blocklist(host: str, token: str=None, admin: bool=False,
    return blocklist
 def fetch_instance_blocklist_misskey(host: str, token: str=None, admin: bool=False,
    import_fields: list=['domain', 'severity'],
    scheme: str='https') -> list[DomainBlock]:
    """Fetch existing block list from server
    @param host: The remote host to connect to.
    @param token: The (required) OAuth Bearer token to authenticate with.
    @param admin: Boolean flag to use the admin API if True.
    @param import_fields: A list of fields to import from the remote instance.
    @returns: A list of the domain blocks from the instance.
    """
    log.info(f"Fetching instance blocklist (Misskey) from {host} ...")
    if admin:
        api_path = "/api/admin/meta"
        parse_format = 'json'
    else:
        sys.exit("Must be an admin to use the meta API")
    headers = requests_headers(token)
    url = f"{scheme}://{host}{api_path}"
    blockdata = []
    response = requests.post(url, headers=headers, timeout=REQUEST_TIMEOUT, json={})
    if response.status_code != 200:
        log.error(f"{response.status_code} Cannot fetch remote blocklist: {response.content}")
        raise ValueError("Unable to fetch domain block list: %s", response)
    # Convert the Misskey block (which are just domains) into
    # something similar to Mastodon response that Fediblockhole
    # can understand.
    def map_block(domain, severity):
        return { "domain": domain, "digest": "", "severity": severity, "comment": "" }
    meta = json.loads(response.content.decode('utf-8'))
    blocked_hosts = [ map_block(domain, "suspend") for domain in meta['blockedHosts'] ]
    silenced_hosts = [ map_block(domain, "silence") for domain in meta['silencedHosts'] ]
    blockdata.extend(blocked_hosts)
    blockdata.extend(silenced_hosts)
    blocklist = parse_blocklist(blockdata, url, parse_format, import_fields)
    return blocklist
 def delete_block(token: str, host: str, id: int, scheme: str='https'):
    """Remove a domain block"""
    log.debug(f"Removing domain block {id} at {host}...")
@ -552,9 +735,9 @@ def add_block(token: str, host: str, blockdata: DomainBlock, scheme: str='https'
        log.warning(err['error'])
    elif response.status_code != 200:
-            
+
        raise ValueError(f"Something went wrong: {response.status_code}: {response.content}")
-           
+
 def push_blocklist(token: str, host: str, blocklist: list[DomainBlock],
                    dryrun: bool=False,
                    import_fields: list=['domain', 'severity'],
@ -563,7 +746,7 @@ def push_blocklist(token: str, host: str, blocklist: list[DomainBlock],
                    override_private_comment: str=None
                    ):
    """Push a blocklist to a remote instance.
-    
+
    Updates existing entries if they exist, creates new blocks if they don't.
    @param token: The Bearer token for OAUTH API authentication
@ -579,11 +762,10 @@ def push_blocklist(token: str, host: str, blocklist: list[DomainBlock],
    serverblocks = fetch_instance_blocklist(host, token, True, import_fields, scheme)
    # # Convert serverblocks to a dictionary keyed by domain name
    # knownblocks = {row.domain: row for row in serverblocks}
    for newblock in blocklist.values():
        log.debug(f"Processing block: {newblock}")
        # Already existing block
        if newblock.domain in serverblocks:
            log.debug(f"Block already exists for {newblock.domain}, checking for differences...")
@ -612,19 +794,12 @@ def push_blocklist(token: str, host: str, blocklist: list[DomainBlock],
                blockdata = oldblock.copy()
                blockdata.update(newblock)
                log.debug(f"Block as dict: {blockdata._asdict()}")
                if not dryrun:
                    update_known_block(token, host, blockdata, scheme)
                    # add a pause here so we don't melt the instance
                    time.sleep(API_CALL_DELAY)
                else:
                    log.info("Dry run selected. Not applying changes.")
            else:
                log.debug("No differences detected. Not updating.")
                pass
        else:
            # New block
            # stamp this record with a private comment, since we're the ones adding it
            if override_private_comment:
                newblock.private_comment = override_private_comment
@ -636,19 +811,56 @@ def push_blocklist(token: str, host: str, blocklist: list[DomainBlock],
            # Make sure the new block doesn't clobber a domain with followers
            newblock.severity = check_followed_severity(host, token, newblock.domain, newblock.severity, max_followed_severity, scheme)
            if not dryrun:
                add_block(token, host, newblock, scheme)
                # add a pause here so we don't melt the instance
                time.sleep(API_CALL_DELAY)
            else:
                log.info("Dry run selected. Not adding block.")
-    for block in serverblocks:
+    # Append existing blocks to the new blocks (needed because Misskey
-        log.debug(f"Checking block: {block}")
+    # is all at once update operation)
    for existing_block in serverblocks.values():
        if existing_block.domain not in blocklist:
            blocklist.blocks[existing_block.domain] = existing_block
    apply_blocks_misskey(blocklist, host, scheme, token)
 def apply_blocks_misskey(blocklist: Blocklist, host:str, scheme: str="https", token: str=None):
    # instead of being cool and trying to update everything, split
    # into lists of suspends and silences, then call update meta with
    # these new lists.
    suspended_hosts = []
    silenced_hosts = []
    suspend_level = BlockSeverity("suspend")
    silence_level = BlockSeverity("silence")
    for block in blocklist.values():
        if block.severity == suspend_level:
            suspended_hosts.append(block.domain)
        elif block.severity == silence_level:
            silenced_hosts.append(block.domain)
    # Make one big call to update-meta with new blocks and suspends
    api_path = "/api/admin/update-meta"
    parse_format = 'json'
    headers = requests_headers(token)
    url = f"{scheme}://{host}{api_path}"
    update_data = {
        "blockedHosts": suspended_hosts,
        "silencedHosts": silenced_hosts
    }
    response = requests.post(url, headers=headers, timeout=REQUEST_TIMEOUT, json=update_data)
    if response.status_code != 200 and response.status_code != 204:
        log.error(f"{response.status_code} Unable to apply blocklist: {response.content}")
        raise ValueError("Unable to apply block list: %s", response)
    log.info(f"Updated meta with {len(suspended_hosts)} blocks and {len(silenced_hosts)} silences")
    return True
 def load_config(configfile: str):
    """Augment commandline arguments with config file parameters
-    
+
    Config file is expected to be in TOML format
    """
    conf = toml.load(configfile)
@ -726,7 +938,7 @@ def save_domain_block_audit_to_file(
 def augment_args(args, tomldata: str=None):
    """Augment commandline arguments with config file parameters
-    
+
    If tomldata is provided, uses that data instead of loading
    from a config file.
    """
@ -752,7 +964,7 @@ def augment_args(args, tomldata: str=None):
    if not args.override_private_comment:
        args.override_private_comment = conf.get('override_private_comment', None)
-    
+
    if not args.savedir:
        args.savedir = conf.get('savedir', '/tmp')
@ -798,7 +1010,7 @@ def setup_argparse():
    ap.add_argument('-b', '--block-audit-file', dest="blocklist_auditfile", help="Save blocklist auditfile to this location.")
    ap.add_argument('--merge-threshold', type=int, help="Merge threshold value")
    ap.add_argument('--merge-threshold-type', choices=['count', 'pct'], help="Type of merge threshold to use.")
-    ap.add_argument('--override-private-comment', dest='override_private_comment', help="Enforces a private comment for all blocks.")
+    ap.add_argument('--override-private-comment', dest='override_private_comment', help="Override private_comment with this string for new blocks when pushing blocklists.")
    ap.add_argument('-I', '--import-field', dest='import_fields', action='append', help="Extra blocklist fields to import.")
    ap.add_argument('-E', '--export-field', dest='export_fields', action='append', help="Extra blocklist fields to export.")
Author	SHA1	Message	Date
projectmoon	28004d3c0b	support misskey blocks	2023-11-30 12:03:51 +01:00
Justin Warren	4d12bac5a4	Merge pull request #59 from sgrigson/override-private-comment Override private comment	2023-10-03 08:41:16 +11:00
Shawn Grigson	58dbed0fa8	some updates to wording/help for override_private_comment	2023-10-02 13:48:13 -05:00
Shawn Grigson	0a63a47ff1	more revert	2023-09-12 08:14:53 -05:00
Shawn Grigson	3592c97627	revert	2023-09-12 08:13:17 -05:00
Shawn Grigson	4d360e6b53	block more errors for 401	2023-09-12 02:43:02 -05:00
Shawn Grigson	a1f81d197f	Fix 401 for add block, too	2023-09-12 02:39:16 -05:00
Shawn Grigson	d1fe11abf5	readme updates also 401 status_code becomes skippable	2023-09-12 02:34:56 -05:00
Shawn Grigson	bda8bf5ebb	meh	2023-09-10 19:38:09 -05:00
Shawn Grigson	c372b210d8	tweak	2023-09-10 19:34:50 -05:00