diff --git a/README.md b/README.md index d93b537..4e28482 100644 --- a/README.md +++ b/README.md @@ -159,11 +159,12 @@ Or you can use the default location of `/etc/default/fediblockhole.conf.toml`. As the filename suggests, FediBlockHole uses TOML syntax. -There are 3 key sections: +There are 4 key sections: 1. `blocklist_urls_sources`: A list of URLs to read blocklists from 1. `blocklist_instance_sources`: A list of Mastodon instances to read blocklists from via API 1. `blocklist_instance_destinations`: A list of Mastodon instances to write blocklists to via API + 1. `allowlist_url_sources`: A list of URLs to read allowlists from More detail on configuring the tool is provided below. @@ -286,6 +287,24 @@ mergeplan. Once the follow count drops to 0 on your instance, the tool will automatically use the highest severity it finds again (if you're using the `max` mergeplan). +### Allowlists + +Sometimes you might want to completely ignore the blocklist definitions for +certain domains. That's what allowlists are for. + +Allowlists remove any domain in the list from the merged list of blocks before +the merged list is saved out to a file or pushed to any instance. + +Allowlists can be in any format supported by `blocklist_urls_sources` but ignore +all fields that aren't `domain`. + +You can also allow domains on the commandline by using the `-A` or `--allow` +flag and providing the domain name to allow. You can use the flag multiple +times to allow multiple domains. + +It is probably wise to include your own instance domain in an allowlist so you +don't accidentally defederate from yourself. + ## More advanced configuration For a list of possible configuration options, check the `--help` and read the diff --git a/etc/sample.fediblockhole.conf.toml b/etc/sample.fediblockhole.conf.toml index 637dde2..5190d25 100644 --- a/etc/sample.fediblockhole.conf.toml +++ b/etc/sample.fediblockhole.conf.toml @@ -16,7 +16,7 @@ blocklist_instance_sources = [ # max_severity tells the parser to override any severities that are higher than this value # import_fields tells the parser to only import that set of fields from a specific source blocklist_url_sources = [ - # { url = 'file:///home/daedalus/src/fediblockhole/samples/demo-blocklist-01.csv', format = 'csv' }, + # { url = 'file:///path/to/fediblockhole/samples/demo-blocklist-01.csv', format = 'csv' }, { url = 'https://raw.githubusercontent.com/eigenmagic/fediblockhole/main/samples/demo-blocklist-01.csv', format = 'csv' }, ] diff --git a/pyproject.toml b/pyproject.toml index 24f9aff..f982ffe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "fediblockhole" -version = "0.4.0" +version = "0.4.1" description = "Federated blocklist management for Mastodon" readme = "README.md" license = {file = "LICENSE"} diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..6d8a5af --- /dev/null +++ b/setup.cfg @@ -0,0 +1,2 @@ +[pytest] +norecursedirs=tests/helpers \ No newline at end of file diff --git a/src/fediblockhole/__init__.py b/src/fediblockhole/__init__.py index e4197c7..945e29c 100755 --- a/src/fediblockhole/__init__.py +++ b/src/fediblockhole/__init__.py @@ -35,10 +35,13 @@ API_CALL_DELAY = 5 * 60 / 300 # 300 calls per 5 minutes # We always import the domain and the severity IMPORT_FIELDS = ['domain', 'severity'] +# Allowlists always import these fields +ALLOWLIST_IMPORT_FIELDS = ['domain', 'severity', 'public_comment', 'private_comment', 'reject_media', 'reject_reports', 'obfuscate'] + # We always export the domain and the severity EXPORT_FIELDS = ['domain', 'severity'] -def sync_blocklists(conf: dict): +def sync_blocklists(conf: argparse.Namespace): """Sync instance blocklists from remote sources. @param conf: A configuration dictionary @@ -69,6 +72,12 @@ def sync_blocklists(conf: dict): # Merge blocklists into an update dict merged = merge_blocklists(blocklists, conf.mergeplan) + + # Remove items listed in allowlists, if any + allowlists = fetch_allowlists(conf) + merged = apply_allowlists(merged, conf, allowlists) + + # Save the final mergelist, if requested if conf.blocklist_savefile: log.info(f"Saving merged blocklist to {conf.blocklist_savefile}") save_blocklist_to_file(merged.values(), conf.blocklist_savefile, export_fields) @@ -82,6 +91,35 @@ def sync_blocklists(conf: dict): max_followed_severity = BlockSeverity(dest.get('max_followed_severity', 'silence')) push_blocklist(token, domain, merged.values(), conf.dryrun, import_fields, max_followed_severity) +def apply_allowlists(merged: dict, conf: argparse.Namespace, allowlists: dict): + """Apply allowlists + """ + # Apply allows specified on the commandline + for domain in conf.allow_domains: + log.info(f"'{domain}' allowed by commandline, removing any blocks...") + if domain in merged: + del merged[domain] + + # Apply allows from URLs lists + log.info("Removing domains from URL allowlists...") + for key, alist in allowlists.items(): + log.debug(f"Processing allows from '{key}'...") + for allowed in alist: + domain = allowed.domain + log.debug(f"Removing allowlisted domain '{domain}' from merged list.") + if domain in merged: + del merged[domain] + + return merged + +def fetch_allowlists(conf: argparse.Namespace) -> dict: + """ + """ + if conf.allowlist_url_sources: + allowlists = fetch_from_urls({}, conf.allowlist_url_sources, ALLOWLIST_IMPORT_FIELDS) + return allowlists + return {} + def fetch_from_urls(blocklists: dict, url_sources: dict, import_fields: list=IMPORT_FIELDS, save_intermediate: bool=False, @@ -142,9 +180,12 @@ def fetch_from_instances(blocklists: dict, sources: dict, def merge_blocklists(blocklists: dict, mergeplan: str='max') -> dict: """Merge fetched remote blocklists into a bulk update + @param blocklists: A dict of lists of DomainBlocks, keyed by source. + Each value is a list of DomainBlocks @param mergeplan: An optional method of merging overlapping block definitions 'max' (the default) uses the highest severity block found 'min' uses the lowest severity block found + @param returns: A dict of DomainBlocks keyed by domain """ merged = {} @@ -435,7 +476,7 @@ def update_known_block(token: str, host: str, block: DomainBlock): response = requests.put(url, headers=requests_headers(token), - data=blockdata, + json=blockdata._asdict(), timeout=REQUEST_TIMEOUT ) if response.status_code != 200: @@ -444,14 +485,14 @@ def update_known_block(token: str, host: str, block: DomainBlock): def add_block(token: str, host: str, blockdata: DomainBlock): """Block a domain on Mastodon host """ - log.debug(f"Blocking domain {blockdata.domain} at {host}...") + log.debug(f"Adding block entry for {blockdata.domain} at {host}...") api_path = "/api/v1/admin/domain_blocks" url = f"https://{host}{api_path}" response = requests.post(url, headers=requests_headers(token), - data=blockdata._asdict(), + json=blockdata._asdict(), timeout=REQUEST_TIMEOUT ) if response.status_code == 422: @@ -517,6 +558,8 @@ def push_blocklist(token: str, host: str, blocklist: list[dict], log.info(f"Pushing new block definition: {newblock}") blockdata = oldblock.copy() blockdata.update(newblock) + log.debug(f"Block as dict: {blockdata._asdict()}") + if not dryrun: update_known_block(token, host, blockdata) # add a pause here so we don't melt the instance @@ -532,6 +575,7 @@ def push_blocklist(token: str, host: str, blocklist: list[dict], # This is a new block for the target instance, so we # need to add a block rather than update an existing one log.info(f"Adding new block: {newblock}...") + log.debug(f"Block as dict: {newblock._asdict()}") # Make sure the new block doesn't clobber a domain with followers newblock.severity = check_followed_severity(host, token, newblock.domain, newblock.severity, max_followed_severity) @@ -629,6 +673,7 @@ def augment_args(args, tomldata: str=None): args.blocklist_url_sources = conf.get('blocklist_url_sources', []) args.blocklist_instance_sources = conf.get('blocklist_instance_sources', []) + args.allowlist_url_sources = conf.get('allowlist_url_sources', []) args.blocklist_instance_destinations = conf.get('blocklist_instance_destinations', []) return args @@ -650,6 +695,7 @@ def setup_argparse(): ap.add_argument('-I', '--import-field', dest='import_fields', action='append', help="Extra blocklist fields to import.") ap.add_argument('-E', '--export-field', dest='export_fields', action='append', help="Extra blocklist fields to export.") + ap.add_argument('-A', '--allow', dest="allow_domains", action='append', default=[], help="Override any blocks to allow this domain.") ap.add_argument('--no-fetch-url', dest='no_fetch_url', action='store_true', help="Don't fetch from URLs, even if configured.") ap.add_argument('--no-fetch-instance', dest='no_fetch_instance', action='store_true', help="Don't fetch from instances, even if configured.") diff --git a/src/fediblockhole/blocklist_parser.py b/src/fediblockhole/blocklist_parser.py index 38b0b59..d5d8394 100644 --- a/src/fediblockhole/blocklist_parser.py +++ b/src/fediblockhole/blocklist_parser.py @@ -97,9 +97,10 @@ class BlocklistParserCSV(BlocklistParser): origitem = blockitem.copy() for key in origitem: if key not in self.import_fields: + log.debug(f"ignoring field '{key}'") del blockitem[key] - # Convert dict to NamedTuple with the double-star operator + # Convert dict to DomainBlock with the double-star operator # See: https://docs.python.org/3/tutorial/controlflow.html#tut-unpacking-arguments block = DomainBlock(**blockitem) if block.severity > self.max_severity: @@ -162,7 +163,7 @@ def str2bool(boolstring: str) -> bool: boolstring = boolstring.lower() if boolstring in ['true', 't', '1', 'y', 'yes']: return True - elif boolstring in ['false', 'f', '0', 'n', 'no']: + elif boolstring in ['', 'false', 'f', '0', 'n', 'no']: return False else: raise ValueError(f"Cannot parse value '{boolstring}' as boolean") @@ -183,4 +184,5 @@ def parse_blocklist( """Parse a blocklist in the given format """ parser = FORMAT_PARSERS[format](import_fields, max_severity) + log.debug(f"parsing {format} blocklist with import_fields: {import_fields}...") return parser.parse_blocklist(blockdata) \ No newline at end of file diff --git a/src/fediblockhole/const.py b/src/fediblockhole/const.py index 909d84d..93cf2ef 100644 --- a/src/fediblockhole/const.py +++ b/src/fediblockhole/const.py @@ -127,13 +127,13 @@ class DomainBlock(object): """Initialize the DomainBlock """ self.domain = domain + self.severity = severity self.public_comment = public_comment self.private_comment = private_comment self.reject_media = reject_media self.reject_reports = reject_reports self.obfuscate = obfuscate self.id = id - self.severity = severity @property def severity(self): @@ -146,17 +146,12 @@ class DomainBlock(object): else: self._severity = BlockSeverity(sev) - # Suspend implies reject_media,reject_reports == True - if self._severity.level == SeverityLevel.SUSPEND: - self.reject_media = True - self.reject_reports = True - def _asdict(self): """Return a dict version of this object """ dictval = { 'domain': self.domain, - 'severity': self.severity, + 'severity': str(self.severity), 'public_comment': self.public_comment, 'private_comment': self.private_comment, 'reject_media': self.reject_media, diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..501ed17 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,3 @@ +import sys +import os +sys.path.append(os.path.join(os.path.dirname(__file__), 'helpers')) \ No newline at end of file diff --git a/tests/helpers/__init__.py b/tests/helpers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/helpers/util.py b/tests/helpers/util.py new file mode 100644 index 0000000..faed6e1 --- /dev/null +++ b/tests/helpers/util.py @@ -0,0 +1,11 @@ +""" Utility functions for tests +""" +from fediblockhole import setup_argparse, augment_args + +def shim_argparse(testargv: list=[], tomldata: str=None): + """Helper function to parse test args + """ + ap = setup_argparse() + args = ap.parse_args(testargv) + args = augment_args(args, tomldata) + return args \ No newline at end of file diff --git a/tests/test_allowlist.py b/tests/test_allowlist.py new file mode 100644 index 0000000..e632361 --- /dev/null +++ b/tests/test_allowlist.py @@ -0,0 +1,49 @@ +""" Test allowlists +""" +import pytest + +from util import shim_argparse +from fediblockhole.const import DomainBlock +from fediblockhole import fetch_allowlists, apply_allowlists + +def test_cmdline_allow_removes_domain(): + """Test that -A removes entries from merged + """ + conf = shim_argparse(['-A', 'removeme.org']) + + merged = { + 'example.org': DomainBlock('example.org'), + 'example2.org': DomainBlock('example.org'), + 'removeme.org': DomainBlock('removeme.org'), + 'keepblockingme.org': DomainBlock('keepblockingme.org'), + } + + # allowlists = { + # 'testlist': [ DomainBlock('removeme.org', 'noop'), ] + # } + + merged = apply_allowlists(merged, conf, {}) + + with pytest.raises(KeyError): + merged['removeme.org'] + +def test_allowlist_removes_domain(): + """Test that an item in an allowlist removes entries from merged + """ + conf = shim_argparse() + + merged = { + 'example.org': DomainBlock('example.org'), + 'example2.org': DomainBlock('example.org'), + 'removeme.org': DomainBlock('removeme.org'), + 'keepblockingme.org': DomainBlock('keepblockingme.org'), + } + + allowlists = { + 'testlist': [ DomainBlock('removeme.org', 'noop'), ] + } + + merged = apply_allowlists(merged, conf, allowlists) + + with pytest.raises(KeyError): + merged['removeme.org'] diff --git a/tests/test_cmdline.py b/tests/test_cmdline.py index ed63349..46b5748 100644 --- a/tests/test_cmdline.py +++ b/tests/test_cmdline.py @@ -1,15 +1,8 @@ """Test the commandline defined parameters correctly """ +from util import shim_argparse from fediblockhole import setup_argparse, augment_args -def shim_argparse(testargv: list=[], tomldata: str=None): - """Helper function to parse test args - """ - ap = setup_argparse() - args = ap.parse_args(testargv) - args = augment_args(args, tomldata) - return args - def test_cmdline_no_configfile(): """ Test bare command with no configfile """ @@ -37,4 +30,18 @@ def test_cmdline_mergeplan_min(): ap = setup_argparse() args = ap.parse_args(['-m', 'min']) - assert args.mergeplan == 'min' \ No newline at end of file + assert args.mergeplan == 'min' + +def test_set_allow_domain(): + """Set a single allow domain on commandline""" + ap = setup_argparse() + args = ap.parse_args(['-A', 'example.org']) + + assert args.allow_domains == ['example.org'] + +def test_set_multiple_allow_domains(): + """Set multiple allow domains on commandline""" + ap = setup_argparse() + args = ap.parse_args(['-A', 'example.org', '-A', 'example2.org', '-A', 'example3.org']) + + assert args.allow_domains == ['example.org', 'example2.org', 'example3.org'] \ No newline at end of file diff --git a/tests/test_configfile.py b/tests/test_configfile.py index b6fb342..4b2c1e7 100644 --- a/tests/test_configfile.py +++ b/tests/test_configfile.py @@ -1,15 +1,8 @@ """Test the config file is loading parameters correctly """ +from util import shim_argparse from fediblockhole import setup_argparse, augment_args -def shim_argparse(testargv: list=[], tomldata: str=None): - """Helper function to parse test args - """ - ap = setup_argparse() - args = ap.parse_args(testargv) - args = augment_args(args, tomldata) - return args - def test_parse_tomldata(): tomldata = """ # Test TOML config for FediBlockHole @@ -45,3 +38,14 @@ def test_set_mergeplan_min(): assert args.mergeplan == 'min' +def test_set_allowlists(): + tomldata = """# Comment on config +allowlist_url_sources = [ { url='file:///path/to/allowlist', format='csv'} ] +""" + args = shim_argparse([], tomldata) + + assert args.mergeplan == 'max' + assert args.allowlist_url_sources == [{ + 'url': 'file:///path/to/allowlist', + 'format': 'csv', + }] diff --git a/tests/test_domainblock.py b/tests/test_domainblock.py index 783fcd8..2db0b51 100644 --- a/tests/test_domainblock.py +++ b/tests/test_domainblock.py @@ -72,12 +72,3 @@ def test_compare_diff_sevs_2(): b = DomainBlock('example1.org', 'noop') assert a != b - -def test_suspend_rejects(): - """A suspend should reject_media and reject_reports - """ - a = DomainBlock('example.org', 'suspend') - - assert a.severity.level == SeverityLevel.SUSPEND - assert a.reject_media == True - assert a.reject_reports == True \ No newline at end of file