Merge pull request #24 from eigenmagic/allowlists

Add allowlists
2023-01-15 17:22:18 +11:00 · 2023-01-15 17:22:18 +11:00 · 12cbee0e0c
parent e5f31a6bd1 dc4bbd740b
commit 12cbee0e0c
14 changed files with 171 additions and 42 deletions
--- a/README.md
+++ b/README.md
@ -159,11 +159,12 @@ Or you can use the default location of `/etc/default/fediblockhole.conf.toml`.

 As the filename suggests, FediBlockHole uses TOML syntax.

-There are 3 key sections:
+There are 4 key sections:
 
 1. `blocklist_urls_sources`: A list of URLs to read blocklists from
 1. `blocklist_instance_sources`: A list of Mastodon instances to read blocklists from via API
 1. `blocklist_instance_destinations`: A list of Mastodon instances to write blocklists to via API
+ 1. `allowlist_url_sources`: A list of URLs to read allowlists from

 More detail on configuring the tool is provided below.

@ -286,6 +287,24 @@ mergeplan.
 Once the follow count drops to 0 on your instance, the tool will automatically
 use the highest severity it finds again (if you're using the `max` mergeplan).

+### Allowlists
+
+Sometimes you might want to completely ignore the blocklist definitions for
+certain domains. That's what allowlists are for.
+
+Allowlists remove any domain in the list from the merged list of blocks before
+the merged list is saved out to a file or pushed to any instance.
+
+Allowlists can be in any format supported by `blocklist_urls_sources` but ignore
+all fields that aren't `domain`.
+
+You can also allow domains on the commandline by using the `-A` or `--allow`
+flag and providing the domain name to allow. You can use the flag multiple
+times to allow multiple domains.
+
+It is probably wise to include your own instance domain in an allowlist so you
+don't accidentally defederate from yourself.
+
 ## More advanced configuration

 For a list of possible configuration options, check the `--help` and read the
--- a/etc/sample.fediblockhole.conf.toml
+++ b/etc/sample.fediblockhole.conf.toml
@ -16,7 +16,7 @@ blocklist_instance_sources = [
 # max_severity tells the parser to override any severities that are higher than this value
 # import_fields tells the parser to only import that set of fields from a specific source
 blocklist_url_sources = [
-  # { url = 'file:///home/daedalus/src/fediblockhole/samples/demo-blocklist-01.csv', format = 'csv' },
+  # { url = 'file:///path/to/fediblockhole/samples/demo-blocklist-01.csv', format = 'csv' },
  { url = 'https://raw.githubusercontent.com/eigenmagic/fediblockhole/main/samples/demo-blocklist-01.csv', format = 'csv' },

 ]
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [project]
 name = "fediblockhole"
-version = "0.4.0"
+version = "0.4.1"
 description = "Federated blocklist management for Mastodon"
 readme = "README.md"
 license = {file = "LICENSE"}
--- a/setup.cfg
+++ b/setup.cfg
@ -0,0 +1,2 @@
+[pytest]
+norecursedirs=tests/helpers
--- a/src/fediblockhole/init.py
+++ b/src/fediblockhole/init.py
@ -35,10 +35,13 @@ API_CALL_DELAY = 5 * 60 / 300 # 300 calls per 5 minutes
 # We always import the domain and the severity
 IMPORT_FIELDS = ['domain', 'severity']

+# Allowlists always import these fields
+ALLOWLIST_IMPORT_FIELDS = ['domain', 'severity', 'public_comment', 'private_comment', 'reject_media', 'reject_reports', 'obfuscate']
+
 # We always export the domain and the severity
 EXPORT_FIELDS = ['domain', 'severity']

-def sync_blocklists(conf: dict):
+def sync_blocklists(conf: argparse.Namespace):
    """Sync instance blocklists from remote sources.

    @param conf: A configuration dictionary
@ -69,6 +72,12 @@ def sync_blocklists(conf: dict):

    # Merge blocklists into an update dict
    merged = merge_blocklists(blocklists, conf.mergeplan)
+
+    # Remove items listed in allowlists, if any
+    allowlists = fetch_allowlists(conf)
+    merged = apply_allowlists(merged, conf, allowlists)
+
+    # Save the final mergelist, if requested
    if conf.blocklist_savefile:
        log.info(f"Saving merged blocklist to {conf.blocklist_savefile}")
        save_blocklist_to_file(merged.values(), conf.blocklist_savefile, export_fields)
@ -82,6 +91,35 @@ def sync_blocklists(conf: dict):
            max_followed_severity = BlockSeverity(dest.get('max_followed_severity', 'silence'))
            push_blocklist(token, domain, merged.values(), conf.dryrun, import_fields, max_followed_severity)

+def apply_allowlists(merged: dict, conf: argparse.Namespace, allowlists: dict):
+    """Apply allowlists
+    """
+    # Apply allows specified on the commandline
+    for domain in conf.allow_domains:
+        log.info(f"'{domain}' allowed by commandline, removing any blocks...")
+        if domain in merged:
+            del merged[domain]
+
+    # Apply allows from URLs lists
+    log.info("Removing domains from URL allowlists...")
+    for key, alist in allowlists.items():
+        log.debug(f"Processing allows from '{key}'...")
+        for allowed in alist:
+            domain = allowed.domain
+            log.debug(f"Removing allowlisted domain '{domain}' from merged list.")
+            if domain in merged:
+                del merged[domain]
+
+    return merged
+
+def fetch_allowlists(conf: argparse.Namespace) -> dict:
+    """
+    """
+    if conf.allowlist_url_sources:
+        allowlists = fetch_from_urls({}, conf.allowlist_url_sources, ALLOWLIST_IMPORT_FIELDS)
+        return allowlists
+    return {}
+
 def fetch_from_urls(blocklists: dict, url_sources: dict,
    import_fields: list=IMPORT_FIELDS,
    save_intermediate: bool=False,
@ -142,9 +180,12 @@ def fetch_from_instances(blocklists: dict, sources: dict,

 def merge_blocklists(blocklists: dict, mergeplan: str='max') -> dict:
    """Merge fetched remote blocklists into a bulk update
+    @param blocklists: A dict of lists of DomainBlocks, keyed by source.
+        Each value is a list of DomainBlocks
    @param mergeplan: An optional method of merging overlapping block definitions
        'max' (the default) uses the highest severity block found
        'min' uses the lowest severity block found
+    @param returns: A dict of DomainBlocks keyed by domain
    """
    merged = {}

@ -435,7 +476,7 @@ def update_known_block(token: str, host: str, block: DomainBlock):

    response = requests.put(url,
        headers=requests_headers(token),
-        data=blockdata,
+        json=blockdata._asdict(),
        timeout=REQUEST_TIMEOUT
    )
    if response.status_code != 200:
@ -444,14 +485,14 @@ def update_known_block(token: str, host: str, block: DomainBlock):
 def add_block(token: str, host: str, blockdata: DomainBlock):
    """Block a domain on Mastodon host
    """
-    log.debug(f"Blocking domain {blockdata.domain} at {host}...")
+    log.debug(f"Adding block entry for {blockdata.domain} at {host}...")
    api_path = "/api/v1/admin/domain_blocks"

    url = f"https://{host}{api_path}"

    response = requests.post(url,
        headers=requests_headers(token),
-        data=blockdata._asdict(),
+        json=blockdata._asdict(),
        timeout=REQUEST_TIMEOUT
    )
    if response.status_code == 422:
@ -517,6 +558,8 @@ def push_blocklist(token: str, host: str, blocklist: list[dict],
                log.info(f"Pushing new block definition: {newblock}")
                blockdata = oldblock.copy()
                blockdata.update(newblock)
+                log.debug(f"Block as dict: {blockdata._asdict()}")
+
                if not dryrun:
                    update_known_block(token, host, blockdata)
                    # add a pause here so we don't melt the instance
@ -532,6 +575,7 @@ def push_blocklist(token: str, host: str, blocklist: list[dict],
            # This is a new block for the target instance, so we
            # need to add a block rather than update an existing one
            log.info(f"Adding new block: {newblock}...")
+            log.debug(f"Block as dict: {newblock._asdict()}")

            # Make sure the new block doesn't clobber a domain with followers
            newblock.severity = check_followed_severity(host, token, newblock.domain, newblock.severity, max_followed_severity)
@ -629,6 +673,7 @@ def augment_args(args, tomldata: str=None):

    args.blocklist_url_sources = conf.get('blocklist_url_sources', [])
    args.blocklist_instance_sources = conf.get('blocklist_instance_sources', [])
+    args.allowlist_url_sources = conf.get('allowlist_url_sources', [])
    args.blocklist_instance_destinations = conf.get('blocklist_instance_destinations', [])

    return args
@ -650,6 +695,7 @@ def setup_argparse():

    ap.add_argument('-I', '--import-field', dest='import_fields', action='append', help="Extra blocklist fields to import.")
    ap.add_argument('-E', '--export-field', dest='export_fields', action='append', help="Extra blocklist fields to export.")
+    ap.add_argument('-A', '--allow', dest="allow_domains", action='append', default=[], help="Override any blocks to allow this domain.")

    ap.add_argument('--no-fetch-url', dest='no_fetch_url', action='store_true', help="Don't fetch from URLs, even if configured.")
    ap.add_argument('--no-fetch-instance', dest='no_fetch_instance', action='store_true', help="Don't fetch from instances, even if configured.")
--- a/src/fediblockhole/blocklist_parser.py
+++ b/src/fediblockhole/blocklist_parser.py
@ -97,9 +97,10 @@ class BlocklistParserCSV(BlocklistParser):
        origitem = blockitem.copy()
        for key in origitem:
            if key not in self.import_fields:
+                log.debug(f"ignoring field '{key}'")
                del blockitem[key]

-        # Convert dict to NamedTuple with the double-star operator
+        # Convert dict to DomainBlock with the double-star operator
        # See: https://docs.python.org/3/tutorial/controlflow.html#tut-unpacking-arguments
        block = DomainBlock(**blockitem)
        if block.severity > self.max_severity:
@ -162,7 +163,7 @@ def str2bool(boolstring: str) -> bool:
    boolstring = boolstring.lower()
    if boolstring in ['true', 't', '1', 'y', 'yes']:
        return True
-    elif boolstring in ['false', 'f', '0', 'n', 'no']:
+    elif boolstring in ['', 'false', 'f', '0', 'n', 'no']:
        return False
    else:
        raise ValueError(f"Cannot parse value '{boolstring}' as boolean")
@ -183,4 +184,5 @@ def parse_blocklist(
    """Parse a blocklist in the given format
    """
    parser = FORMAT_PARSERS[format](import_fields, max_severity)
+    log.debug(f"parsing {format} blocklist with import_fields: {import_fields}...")
    return parser.parse_blocklist(blockdata)
--- a/src/fediblockhole/const.py
+++ b/src/fediblockhole/const.py
@ -127,13 +127,13 @@ class DomainBlock(object):
        """Initialize the DomainBlock
        """        
        self.domain = domain
+        self.severity = severity
        self.public_comment = public_comment
        self.private_comment = private_comment
        self.reject_media = reject_media
        self.reject_reports = reject_reports
        self.obfuscate = obfuscate
        self.id = id
-        self.severity = severity

    @property
    def severity(self):
@ -146,17 +146,12 @@ class DomainBlock(object):
        else:
            self._severity = BlockSeverity(sev)

-        # Suspend implies reject_media,reject_reports == True
-        if self._severity.level == SeverityLevel.SUSPEND:
-            self.reject_media = True
-            self.reject_reports = True
-
    def _asdict(self):
        """Return a dict version of this object
        """
        dictval = {
            'domain': self.domain,
-            'severity': self.severity,
+            'severity': str(self.severity),
            'public_comment': self.public_comment,
            'private_comment': self.private_comment,
            'reject_media': self.reject_media,
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -0,0 +1,3 @@
+import sys
+import os
+sys.path.append(os.path.join(os.path.dirname(__file__), 'helpers'))
--- a/tests/helpers/init.py
+++ b/tests/helpers/init.py
--- a/tests/helpers/util.py
+++ b/tests/helpers/util.py
@ -0,0 +1,11 @@
+""" Utility functions for tests
+"""
+from fediblockhole import setup_argparse, augment_args
+
+def shim_argparse(testargv: list=[], tomldata: str=None):
+    """Helper function to parse test args
+    """
+    ap = setup_argparse()
+    args = ap.parse_args(testargv)
+    args = augment_args(args, tomldata)
+    return args
--- a/tests/test_allowlist.py
+++ b/tests/test_allowlist.py
@ -0,0 +1,49 @@
+""" Test allowlists
+"""
+import pytest
+
+from util import shim_argparse
+from fediblockhole.const import DomainBlock
+from fediblockhole import fetch_allowlists, apply_allowlists
+
+def test_cmdline_allow_removes_domain():
+    """Test that -A <domain> removes entries from merged
+    """
+    conf = shim_argparse(['-A', 'removeme.org'])
+
+    merged = {
+        'example.org': DomainBlock('example.org'),
+        'example2.org': DomainBlock('example.org'),
+        'removeme.org': DomainBlock('removeme.org'),
+        'keepblockingme.org': DomainBlock('keepblockingme.org'),
+    }
+
+    # allowlists = {
+    #     'testlist': [ DomainBlock('removeme.org', 'noop'), ]
+    # }
+    
+    merged = apply_allowlists(merged, conf, {})
+
+    with pytest.raises(KeyError):
+        merged['removeme.org']
+
+def test_allowlist_removes_domain():
+    """Test that an item in an allowlist removes entries from merged
+    """
+    conf = shim_argparse()
+
+    merged = {
+        'example.org': DomainBlock('example.org'),
+        'example2.org': DomainBlock('example.org'),
+        'removeme.org': DomainBlock('removeme.org'),
+        'keepblockingme.org': DomainBlock('keepblockingme.org'),
+    }
+
+    allowlists = {
+        'testlist': [ DomainBlock('removeme.org', 'noop'), ]
+    }
+    
+    merged = apply_allowlists(merged, conf, allowlists)
+
+    with pytest.raises(KeyError):
+        merged['removeme.org']
--- a/tests/test_cmdline.py
+++ b/tests/test_cmdline.py
@ -1,15 +1,8 @@
 """Test the commandline defined parameters correctly
 """
+from util import shim_argparse
 from fediblockhole import setup_argparse, augment_args

-def shim_argparse(testargv: list=[], tomldata: str=None):
-    """Helper function to parse test args
-    """
-    ap = setup_argparse()
-    args = ap.parse_args(testargv)
-    args = augment_args(args, tomldata)
-    return args
-
 def test_cmdline_no_configfile():
    """ Test bare command with no configfile
    """
@ -38,3 +31,17 @@ def test_cmdline_mergeplan_min():
    args = ap.parse_args(['-m', 'min'])

    assert args.mergeplan == 'min'
+
+def test_set_allow_domain():
+    """Set a single allow domain on commandline"""
+    ap = setup_argparse()
+    args = ap.parse_args(['-A', 'example.org'])
+
+    assert args.allow_domains == ['example.org']
+
+def test_set_multiple_allow_domains():
+    """Set multiple allow domains on commandline"""
+    ap = setup_argparse()
+    args = ap.parse_args(['-A', 'example.org', '-A', 'example2.org', '-A', 'example3.org'])
+
+    assert args.allow_domains == ['example.org', 'example2.org', 'example3.org']
--- a/tests/test_configfile.py
+++ b/tests/test_configfile.py
@ -1,15 +1,8 @@
 """Test the config file is loading parameters correctly
 """
+from util import shim_argparse
 from fediblockhole import setup_argparse, augment_args

-def shim_argparse(testargv: list=[], tomldata: str=None):
-    """Helper function to parse test args
-    """
-    ap = setup_argparse()
-    args = ap.parse_args(testargv)
-    args = augment_args(args, tomldata)
-    return args
-
 def test_parse_tomldata():
    tomldata = """
 # Test TOML config for FediBlockHole
@ -45,3 +38,14 @@ def test_set_mergeplan_min():

    assert args.mergeplan == 'min'

+def test_set_allowlists():
+    tomldata = """# Comment on config
+allowlist_url_sources = [ { url='file:///path/to/allowlist', format='csv'} ] 
+"""
+    args = shim_argparse([], tomldata)
+
+    assert args.mergeplan == 'max'
+    assert args.allowlist_url_sources == [{
+        'url': 'file:///path/to/allowlist',
+        'format': 'csv',
+        }]
--- a/tests/test_domainblock.py
+++ b/tests/test_domainblock.py
@ -72,12 +72,3 @@ def test_compare_diff_sevs_2():
    b = DomainBlock('example1.org', 'noop')

    assert a != b
-
-def test_suspend_rejects():
-    """A suspend should reject_media and reject_reports
-    """
-    a = DomainBlock('example.org', 'suspend')
-
-    assert a.severity.level == SeverityLevel.SUSPEND
-    assert a.reject_media == True
-    assert a.reject_reports == True