From 9817c99e40463e78de55dcb8ff526fbd20fe8d36 Mon Sep 17 00:00:00 2001 From: Justin Warren Date: Sat, 14 Jan 2023 10:41:54 +1100 Subject: [PATCH 01/15] Fixed bug in _asdict() of severity level. --- src/fediblockhole/const.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fediblockhole/const.py b/src/fediblockhole/const.py index 909d84d..e771c4b 100644 --- a/src/fediblockhole/const.py +++ b/src/fediblockhole/const.py @@ -156,7 +156,7 @@ class DomainBlock(object): """ dictval = { 'domain': self.domain, - 'severity': self.severity, + 'severity': str(self.severity), 'public_comment': self.public_comment, 'private_comment': self.private_comment, 'reject_media': self.reject_media, From 894b133fbb956bddeb24c8c6ae6be6af563f4449 Mon Sep 17 00:00:00 2001 From: Justin Warren Date: Sat, 14 Jan 2023 10:43:17 +1100 Subject: [PATCH 02/15] str2bool() now converts '' to False. Added some extra debug logging of blocklist parsing. --- src/fediblockhole/blocklist_parser.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/fediblockhole/blocklist_parser.py b/src/fediblockhole/blocklist_parser.py index 38b0b59..d5d8394 100644 --- a/src/fediblockhole/blocklist_parser.py +++ b/src/fediblockhole/blocklist_parser.py @@ -97,9 +97,10 @@ class BlocklistParserCSV(BlocklistParser): origitem = blockitem.copy() for key in origitem: if key not in self.import_fields: + log.debug(f"ignoring field '{key}'") del blockitem[key] - # Convert dict to NamedTuple with the double-star operator + # Convert dict to DomainBlock with the double-star operator # See: https://docs.python.org/3/tutorial/controlflow.html#tut-unpacking-arguments block = DomainBlock(**blockitem) if block.severity > self.max_severity: @@ -162,7 +163,7 @@ def str2bool(boolstring: str) -> bool: boolstring = boolstring.lower() if boolstring in ['true', 't', '1', 'y', 'yes']: return True - elif boolstring in ['false', 'f', '0', 'n', 'no']: + elif boolstring in ['', 'false', 'f', '0', 'n', 'no']: return False else: raise ValueError(f"Cannot parse value '{boolstring}' as boolean") @@ -183,4 +184,5 @@ def parse_blocklist( """Parse a blocklist in the given format """ parser = FORMAT_PARSERS[format](import_fields, max_severity) + log.debug(f"parsing {format} blocklist with import_fields: {import_fields}...") return parser.parse_blocklist(blockdata) \ No newline at end of file From 7a31c3380ef6aefdba895df678879251df483846 Mon Sep 17 00:00:00 2001 From: Justin Warren Date: Sat, 14 Jan 2023 10:44:08 +1100 Subject: [PATCH 03/15] Added support for allowlists. Updated docstring for merge_blocklists() --- src/fediblockhole/__init__.py | 39 +++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/src/fediblockhole/__init__.py b/src/fediblockhole/__init__.py index 6cdf143..fdb3a5e 100755 --- a/src/fediblockhole/__init__.py +++ b/src/fediblockhole/__init__.py @@ -35,6 +35,9 @@ API_CALL_DELAY = 5 * 60 / 300 # 300 calls per 5 minutes # We always import the domain and the severity IMPORT_FIELDS = ['domain', 'severity'] +# Allowlists always import these fields +ALLOWLIST_IMPORT_FIELDS = ['domain', 'severity', 'public_comment', 'private_comment', 'reject_media', 'reject_reports', 'obfuscate'] + # We always export the domain and the severity EXPORT_FIELDS = ['domain', 'severity'] @@ -69,6 +72,22 @@ def sync_blocklists(conf: dict): # Merge blocklists into an update dict merged = merge_blocklists(blocklists, conf.mergeplan) + + # Apply allows specified on the commandline + for domain in conf.allow_domains: + log.info(f"Allowing domain '{domain}' specified on commandline.") + merged[domain] = DomainBlock(domain, 'noop') + + # Apply allows from URLs lists + if conf.allowlist_url_sources: + log.info("Adding allows from URL lists...") + allowlists = fetch_from_urls({}, conf.allowlist_url_sources, ALLOWLIST_IMPORT_FIELDS) + for key, alist in allowlists.items(): + log.debug(f"Processing allows from '{key}'...") + for allowed in alist: + merged[allowed.domain] = allowed + log.debug(f"Allowed domain '{allowed.domain}' from allowlist: {allowed}") + if conf.blocklist_savefile: log.info(f"Saving merged blocklist to {conf.blocklist_savefile}") save_blocklist_to_file(merged.values(), conf.blocklist_savefile, export_fields) @@ -140,9 +159,12 @@ def fetch_from_instances(blocklists: dict, sources: dict, def merge_blocklists(blocklists: dict, mergeplan: str='max') -> dict: """Merge fetched remote blocklists into a bulk update + @param blocklists: A dict of lists of DomainBlocks, keyed by source. + Each value is a list of DomainBlocks @param mergeplan: An optional method of merging overlapping block definitions 'max' (the default) uses the highest severity block found 'min' uses the lowest severity block found + @param returns: A dict of DomainBlocks keyed by domain """ merged = {} @@ -433,7 +455,7 @@ def update_known_block(token: str, host: str, block: DomainBlock): response = requests.put(url, headers=requests_headers(token), - data=blockdata, + json=blockdata._asdict(), timeout=REQUEST_TIMEOUT ) if response.status_code != 200: @@ -442,14 +464,14 @@ def update_known_block(token: str, host: str, block: DomainBlock): def add_block(token: str, host: str, blockdata: DomainBlock): """Block a domain on Mastodon host """ - log.debug(f"Blocking domain {blockdata.domain} at {host}...") + log.debug(f"Adding block entry for {blockdata.domain} at {host}...") api_path = "/api/v1/admin/domain_blocks" url = f"https://{host}{api_path}" response = requests.post(url, headers=requests_headers(token), - data=blockdata._asdict(), + json=blockdata._asdict(), timeout=REQUEST_TIMEOUT ) if response.status_code == 422: @@ -515,6 +537,8 @@ def push_blocklist(token: str, host: str, blocklist: list[dict], log.info(f"Pushing new block definition: {newblock}") blockdata = oldblock.copy() blockdata.update(newblock) + log.debug(f"Block as dict: {blockdata._asdict()}") + if not dryrun: update_known_block(token, host, blockdata) # add a pause here so we don't melt the instance @@ -530,6 +554,7 @@ def push_blocklist(token: str, host: str, blocklist: list[dict], # This is a new block for the target instance, so we # need to add a block rather than update an existing one log.info(f"Adding new block: {newblock}...") + log.debug(f"Block as dict: {newblock._asdict()}") # Make sure the new block doesn't clobber a domain with followers newblock.severity = check_followed_severity(host, token, newblock.domain, newblock.severity, max_followed_severity) @@ -615,9 +640,10 @@ def augment_args(args): if not args.import_fields: args.import_fields = conf.get('import_fields', []) - args.blocklist_url_sources = conf.get('blocklist_url_sources') - args.blocklist_instance_sources = conf.get('blocklist_instance_sources') - args.blocklist_instance_destinations = conf.get('blocklist_instance_destinations') + args.blocklist_url_sources = conf.get('blocklist_url_sources', None) + args.blocklist_instance_sources = conf.get('blocklist_instance_sources', None) + args.allowlist_url_sources = conf.get('allowlist_url_sources', None) + args.blocklist_instance_destinations = conf.get('blocklist_instance_destinations', None) return args @@ -637,6 +663,7 @@ def main(): ap.add_argument('-I', '--import-field', dest='import_fields', action='append', help="Extra blocklist fields to import.") ap.add_argument('-E', '--export-field', dest='export_fields', action='append', help="Extra blocklist fields to export.") + ap.add_argument('-A', '--allow', dest="allow_domains", action='append', default=[], help="Override any blocks to allow this domain.") ap.add_argument('--no-fetch-url', dest='no_fetch_url', action='store_true', help="Don't fetch from URLs, even if configured.") ap.add_argument('--no-fetch-instance', dest='no_fetch_instance', action='store_true', help="Don't fetch from instances, even if configured.") From 6d4e18bbf6f25da2512fe3ef902341f8ebddcae5 Mon Sep 17 00:00:00 2001 From: Justin Warren Date: Sat, 14 Jan 2023 10:59:19 +1100 Subject: [PATCH 04/15] Fixed bug in how DomainBlock defaults handle reject_media, reject_reports. --- src/fediblockhole/const.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/fediblockhole/const.py b/src/fediblockhole/const.py index e771c4b..b67959b 100644 --- a/src/fediblockhole/const.py +++ b/src/fediblockhole/const.py @@ -127,13 +127,15 @@ class DomainBlock(object): """Initialize the DomainBlock """ self.domain = domain + # Set severity first so if reject_media or reject_reports = False + # that overrides the default of True for severity = 'suspend' + self.severity = severity self.public_comment = public_comment self.private_comment = private_comment self.reject_media = reject_media self.reject_reports = reject_reports self.obfuscate = obfuscate self.id = id - self.severity = severity @property def severity(self): @@ -147,6 +149,7 @@ class DomainBlock(object): self._severity = BlockSeverity(sev) # Suspend implies reject_media,reject_reports == True + log.debug('Suspend blocks media and reports') if self._severity.level == SeverityLevel.SUSPEND: self.reject_media = True self.reject_reports = True From 17e0bdab0bb4758dc6e1e52991199fba985e81f4 Mon Sep 17 00:00:00 2001 From: Justin Warren Date: Sat, 14 Jan 2023 11:00:21 +1100 Subject: [PATCH 05/15] Force allowlist entries to always be severity 'noop'. --- src/fediblockhole/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/fediblockhole/__init__.py b/src/fediblockhole/__init__.py index fdb3a5e..1daad8a 100755 --- a/src/fediblockhole/__init__.py +++ b/src/fediblockhole/__init__.py @@ -85,6 +85,9 @@ def sync_blocklists(conf: dict): for key, alist in allowlists.items(): log.debug(f"Processing allows from '{key}'...") for allowed in alist: + # Ensure the severity is always 'noop' + # This is to prevent accidentally blocking something you wanted to allow. + allowed.severity = 'noop' merged[allowed.domain] = allowed log.debug(f"Allowed domain '{allowed.domain}' from allowlist: {allowed}") From 26f54648267308e165cfacbbd9a23315fd5d4f3b Mon Sep 17 00:00:00 2001 From: Justin Warren Date: Sat, 14 Jan 2023 11:09:38 +1100 Subject: [PATCH 06/15] Added documentation on allowlists. --- README.md | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index d93b537..a4bebde 100644 --- a/README.md +++ b/README.md @@ -159,11 +159,12 @@ Or you can use the default location of `/etc/default/fediblockhole.conf.toml`. As the filename suggests, FediBlockHole uses TOML syntax. -There are 3 key sections: +There are 4 key sections: 1. `blocklist_urls_sources`: A list of URLs to read blocklists from 1. `blocklist_instance_sources`: A list of Mastodon instances to read blocklists from via API 1. `blocklist_instance_destinations`: A list of Mastodon instances to write blocklists to via API + 1. `allowlist_url_sources`: A list of URLs to read allowlists from More detail on configuring the tool is provided below. @@ -286,6 +287,25 @@ mergeplan. Once the follow count drops to 0 on your instance, the tool will automatically use the highest severity it finds again (if you're using the `max` mergeplan). +### Allowlists + +Sometimes you might want to override the blocklist definitions and always allow +certain domains to access your instance. That's what allowlists are for. + +Allowlists can be any in format supported by `blocklist_urls_sources` but will +always set the severity to 'noop'. + +An allowlist can contain just the `domain` field and a set of domains, but can +also contain other fields if you want to add public or private comments, for +example. + +You can also allow domains on the commandline by using the `-A` or `--allow` +flag and providing the domain name to allow. You can use the flag multiple +times to allow multiple domains. + +The allowed domains will be included in the final merged list of domains that +gets exported if you choose to save the mergelist to a file. + ## More advanced configuration For a list of possible configuration options, check the `--help` and read the From 7bc10d6690ea909aa5f90af5455462a8bbf8298b Mon Sep 17 00:00:00 2001 From: Justin Warren Date: Sat, 14 Jan 2023 11:09:53 +1100 Subject: [PATCH 07/15] Version bump to v0.4.1 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 24f9aff..f982ffe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "fediblockhole" -version = "0.4.0" +version = "0.4.1" description = "Federated blocklist management for Mastodon" readme = "README.md" license = {file = "LICENSE"} From 11accf33d31d3158de6d9a258d79d67000507fbc Mon Sep 17 00:00:00 2001 From: Justin Warren Date: Sun, 15 Jan 2023 09:46:37 +1100 Subject: [PATCH 08/15] Restructured argparsing for easier testing. Fixed bug: mergeplan in config file was ignored. Reported in #22 Added test cases for cmdline parsing. Added test cases for configfile parsing. --- src/fediblockhole/__init__.py | 41 ++++++++++++++++++++++-------- tests/test_cmdline.py | 40 +++++++++++++++++++++++++++++ tests/test_configfile.py | 47 +++++++++++++++++++++++++++++++++++ 3 files changed, 117 insertions(+), 11 deletions(-) create mode 100644 tests/test_cmdline.py create mode 100644 tests/test_configfile.py diff --git a/src/fediblockhole/__init__.py b/src/fediblockhole/__init__.py index 6cdf143..e4197c7 100755 --- a/src/fediblockhole/__init__.py +++ b/src/fediblockhole/__init__.py @@ -126,6 +126,8 @@ def fetch_from_instances(blocklists: dict, sources: dict, domain = item['domain'] admin = item.get('admin', False) token = item.get('token', None) + itemsrc = f"https://{domain}/api" + # If import fields are provided, they override the global ones passed in source_import_fields = item.get('import_fields', None) if source_import_fields: @@ -133,9 +135,9 @@ def fetch_from_instances(blocklists: dict, sources: dict, import_fields = IMPORT_FIELDS.extend(source_import_fields) # Add the blocklist with the domain as the source key - blocklists[domain] = fetch_instance_blocklist(domain, token, admin, import_fields) + blocklists[itemsrc] = fetch_instance_blocklist(domain, token, admin, import_fields) if save_intermediate: - save_intermediate_blocklist(blocklists[domain], domain, savedir, export_fields) + save_intermediate_blocklist(blocklists[itemsrc], domain, savedir, export_fields) return blocklists def merge_blocklists(blocklists: dict, mergeplan: str='max') -> dict: @@ -587,9 +589,16 @@ def save_blocklist_to_file( for item in blocklist: writer.writerow(item._asdict()) -def augment_args(args): - """Augment commandline arguments with config file parameters""" - conf = toml.load(args.config) +def augment_args(args, tomldata: str=None): + """Augment commandline arguments with config file parameters + + If tomldata is provided, uses that data instead of loading + from a config file. + """ + if tomldata: + conf = toml.loads(tomldata) + else: + conf = toml.load(args.config) if not args.no_fetch_url: args.no_fetch_url = conf.get('no_fetch_url', False) @@ -615,14 +624,18 @@ def augment_args(args): if not args.import_fields: args.import_fields = conf.get('import_fields', []) - args.blocklist_url_sources = conf.get('blocklist_url_sources') - args.blocklist_instance_sources = conf.get('blocklist_instance_sources') - args.blocklist_instance_destinations = conf.get('blocklist_instance_destinations') + if not args.mergeplan: + args.mergeplan = conf.get('mergeplan', 'max') + + args.blocklist_url_sources = conf.get('blocklist_url_sources', []) + args.blocklist_instance_sources = conf.get('blocklist_instance_sources', []) + args.blocklist_instance_destinations = conf.get('blocklist_instance_destinations', []) return args -def main(): - +def setup_argparse(): + """Setup the commandline arguments + """ ap = argparse.ArgumentParser( description="Bulk blocklist tool", epilog=f"Part of FediBlockHole v{__version__}", @@ -633,7 +646,7 @@ def main(): ap.add_argument('-o', '--outfile', dest="blocklist_savefile", help="Save merged blocklist to a local file.") ap.add_argument('-S', '--save-intermediate', dest="save_intermediate", action='store_true', help="Save intermediate blocklists we fetch to local files.") ap.add_argument('-D', '--savedir', dest="savedir", help="Directory path to save intermediate lists.") - ap.add_argument('-m', '--mergeplan', choices=['min', 'max'], default='max', help="Set mergeplan.") + ap.add_argument('-m', '--mergeplan', choices=['min', 'max'], help="Set mergeplan.") ap.add_argument('-I', '--import-field', dest='import_fields', action='append', help="Extra blocklist fields to import.") ap.add_argument('-E', '--export-field', dest='export_fields', action='append', help="Extra blocklist fields to export.") @@ -645,7 +658,13 @@ def main(): ap.add_argument('--loglevel', choices=['debug', 'info', 'warning', 'error', 'critical'], help="Set log output level.") ap.add_argument('--dryrun', action='store_true', help="Don't actually push updates, just show what would happen.") + return ap + +def main(): + + ap = setup_argparse() args = ap.parse_args() + if args.loglevel is not None: levelname = args.loglevel.upper() log.setLevel(getattr(logging, levelname)) diff --git a/tests/test_cmdline.py b/tests/test_cmdline.py new file mode 100644 index 0000000..ed63349 --- /dev/null +++ b/tests/test_cmdline.py @@ -0,0 +1,40 @@ +"""Test the commandline defined parameters correctly +""" +from fediblockhole import setup_argparse, augment_args + +def shim_argparse(testargv: list=[], tomldata: str=None): + """Helper function to parse test args + """ + ap = setup_argparse() + args = ap.parse_args(testargv) + args = augment_args(args, tomldata) + return args + +def test_cmdline_no_configfile(): + """ Test bare command with no configfile + """ + ap = setup_argparse() + args = ap.parse_args([]) + + assert args.config == '/etc/default/fediblockhole.conf.toml' + assert args.mergeplan == None + assert args.blocklist_savefile == None + assert args.save_intermediate == False + assert args.savedir == None + assert args.import_fields == None + assert args.export_fields == None + + assert args.no_fetch_url == False + assert args.no_fetch_instance == False + assert args.no_push_instance == False + assert args.dryrun == False + + assert args.loglevel == None + +def test_cmdline_mergeplan_min(): + """ Test setting mergeplan min + """ + ap = setup_argparse() + args = ap.parse_args(['-m', 'min']) + + assert args.mergeplan == 'min' \ No newline at end of file diff --git a/tests/test_configfile.py b/tests/test_configfile.py new file mode 100644 index 0000000..b6fb342 --- /dev/null +++ b/tests/test_configfile.py @@ -0,0 +1,47 @@ +"""Test the config file is loading parameters correctly +""" +from fediblockhole import setup_argparse, augment_args + +def shim_argparse(testargv: list=[], tomldata: str=None): + """Helper function to parse test args + """ + ap = setup_argparse() + args = ap.parse_args(testargv) + args = augment_args(args, tomldata) + return args + +def test_parse_tomldata(): + tomldata = """ +# Test TOML config for FediBlockHole + +blocklist_instance_sources = [] + +blocklist_url_sources = [] + +save_intermediate = true + +import_fields = ['public_comment'] +""" + ap = setup_argparse() + args = ap.parse_args([]) + args = augment_args(args, tomldata) + + assert args.blocklist_instance_sources == [] + assert args.blocklist_url_sources == [] + assert args.save_intermediate == True + assert args.import_fields == ['public_comment'] + +def test_set_mergeplan_max(): + tomldata = """mergeplan = 'max' + """ + args = shim_argparse([], tomldata) + + assert args.mergeplan == 'max' + +def test_set_mergeplan_min(): + tomldata = """mergeplan = 'min' + """ + args = shim_argparse([], tomldata) + + assert args.mergeplan == 'min' + From 3aa2e378e3d184c3408ee89b0e08c6fb731ff4a5 Mon Sep 17 00:00:00 2001 From: Justin Warren Date: Sun, 15 Jan 2023 10:02:12 +1100 Subject: [PATCH 09/15] Remove implied setting of reject_media/reports if severity is set to 'suspend'. Complicates understanding of what the code actually does. --- src/fediblockhole/const.py | 8 -------- tests/test_domainblock.py | 9 --------- 2 files changed, 17 deletions(-) diff --git a/src/fediblockhole/const.py b/src/fediblockhole/const.py index b67959b..93cf2ef 100644 --- a/src/fediblockhole/const.py +++ b/src/fediblockhole/const.py @@ -127,8 +127,6 @@ class DomainBlock(object): """Initialize the DomainBlock """ self.domain = domain - # Set severity first so if reject_media or reject_reports = False - # that overrides the default of True for severity = 'suspend' self.severity = severity self.public_comment = public_comment self.private_comment = private_comment @@ -148,12 +146,6 @@ class DomainBlock(object): else: self._severity = BlockSeverity(sev) - # Suspend implies reject_media,reject_reports == True - log.debug('Suspend blocks media and reports') - if self._severity.level == SeverityLevel.SUSPEND: - self.reject_media = True - self.reject_reports = True - def _asdict(self): """Return a dict version of this object """ diff --git a/tests/test_domainblock.py b/tests/test_domainblock.py index 783fcd8..2db0b51 100644 --- a/tests/test_domainblock.py +++ b/tests/test_domainblock.py @@ -72,12 +72,3 @@ def test_compare_diff_sevs_2(): b = DomainBlock('example1.org', 'noop') assert a != b - -def test_suspend_rejects(): - """A suspend should reject_media and reject_reports - """ - a = DomainBlock('example.org', 'suspend') - - assert a.severity.level == SeverityLevel.SUSPEND - assert a.reject_media == True - assert a.reject_reports == True \ No newline at end of file From a3d3571a20539aae13adcdb9cb863e2a4ad8b1d9 Mon Sep 17 00:00:00 2001 From: Justin Warren Date: Sun, 15 Jan 2023 10:10:52 +1100 Subject: [PATCH 10/15] Added basic tests of allowlist config args. --- tests/test_cmdline.py | 16 +++++++++++++++- tests/test_configfile.py | 11 +++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/tests/test_cmdline.py b/tests/test_cmdline.py index ed63349..4e6f355 100644 --- a/tests/test_cmdline.py +++ b/tests/test_cmdline.py @@ -37,4 +37,18 @@ def test_cmdline_mergeplan_min(): ap = setup_argparse() args = ap.parse_args(['-m', 'min']) - assert args.mergeplan == 'min' \ No newline at end of file + assert args.mergeplan == 'min' + +def test_set_allow_domain(): + """Set a single allow domain on commandline""" + ap = setup_argparse() + args = ap.parse_args(['-A', 'example.org']) + + assert args.allow_domains == ['example.org'] + +def test_set_multiple_allow_domains(): + """Set multiple allow domains on commandline""" + ap = setup_argparse() + args = ap.parse_args(['-A', 'example.org', '-A', 'example2.org', '-A', 'example3.org']) + + assert args.allow_domains == ['example.org', 'example2.org', 'example3.org'] \ No newline at end of file diff --git a/tests/test_configfile.py b/tests/test_configfile.py index b6fb342..d52a425 100644 --- a/tests/test_configfile.py +++ b/tests/test_configfile.py @@ -45,3 +45,14 @@ def test_set_mergeplan_min(): assert args.mergeplan == 'min' +def test_set_allowlists(): + tomldata = """# Comment on config +allowlist_url_sources = [ { url='file:///path/to/allowlist', format='csv'} ] +""" + args = shim_argparse([], tomldata) + + assert args.mergeplan == 'max' + assert args.allowlist_url_sources == [{ + 'url': 'file:///path/to/allowlist', + 'format': 'csv', + }] From bf48a9639ea0390d34181d35e874f444096b91bc Mon Sep 17 00:00:00 2001 From: Justin Warren Date: Sun, 15 Jan 2023 13:32:13 +1100 Subject: [PATCH 11/15] Added helper submodule for testing utils --- setup.cfg | 2 ++ tests/conftest.py | 3 +++ tests/helpers/__init__.py | 0 tests/helpers/util.py | 11 +++++++++++ tests/test_cmdline.py | 9 +-------- tests/test_configfile.py | 9 +-------- 6 files changed, 18 insertions(+), 16 deletions(-) create mode 100644 setup.cfg create mode 100644 tests/conftest.py create mode 100644 tests/helpers/__init__.py create mode 100644 tests/helpers/util.py diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..6d8a5af --- /dev/null +++ b/setup.cfg @@ -0,0 +1,2 @@ +[pytest] +norecursedirs=tests/helpers \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..501ed17 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,3 @@ +import sys +import os +sys.path.append(os.path.join(os.path.dirname(__file__), 'helpers')) \ No newline at end of file diff --git a/tests/helpers/__init__.py b/tests/helpers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/helpers/util.py b/tests/helpers/util.py new file mode 100644 index 0000000..faed6e1 --- /dev/null +++ b/tests/helpers/util.py @@ -0,0 +1,11 @@ +""" Utility functions for tests +""" +from fediblockhole import setup_argparse, augment_args + +def shim_argparse(testargv: list=[], tomldata: str=None): + """Helper function to parse test args + """ + ap = setup_argparse() + args = ap.parse_args(testargv) + args = augment_args(args, tomldata) + return args \ No newline at end of file diff --git a/tests/test_cmdline.py b/tests/test_cmdline.py index 4e6f355..46b5748 100644 --- a/tests/test_cmdline.py +++ b/tests/test_cmdline.py @@ -1,15 +1,8 @@ """Test the commandline defined parameters correctly """ +from util import shim_argparse from fediblockhole import setup_argparse, augment_args -def shim_argparse(testargv: list=[], tomldata: str=None): - """Helper function to parse test args - """ - ap = setup_argparse() - args = ap.parse_args(testargv) - args = augment_args(args, tomldata) - return args - def test_cmdline_no_configfile(): """ Test bare command with no configfile """ diff --git a/tests/test_configfile.py b/tests/test_configfile.py index d52a425..4b2c1e7 100644 --- a/tests/test_configfile.py +++ b/tests/test_configfile.py @@ -1,15 +1,8 @@ """Test the config file is loading parameters correctly """ +from util import shim_argparse from fediblockhole import setup_argparse, augment_args -def shim_argparse(testargv: list=[], tomldata: str=None): - """Helper function to parse test args - """ - ap = setup_argparse() - args = ap.parse_args(testargv) - args = augment_args(args, tomldata) - return args - def test_parse_tomldata(): tomldata = """ # Test TOML config for FediBlockHole From 9bd79145dca845cdb7df1a388eebc0c99be705c0 Mon Sep 17 00:00:00 2001 From: Justin Warren Date: Sun, 15 Jan 2023 13:32:45 +1100 Subject: [PATCH 12/15] Edited sample config to better explain URL source --- etc/sample.fediblockhole.conf.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etc/sample.fediblockhole.conf.toml b/etc/sample.fediblockhole.conf.toml index 637dde2..5190d25 100644 --- a/etc/sample.fediblockhole.conf.toml +++ b/etc/sample.fediblockhole.conf.toml @@ -16,7 +16,7 @@ blocklist_instance_sources = [ # max_severity tells the parser to override any severities that are higher than this value # import_fields tells the parser to only import that set of fields from a specific source blocklist_url_sources = [ - # { url = 'file:///home/daedalus/src/fediblockhole/samples/demo-blocklist-01.csv', format = 'csv' }, + # { url = 'file:///path/to/fediblockhole/samples/demo-blocklist-01.csv', format = 'csv' }, { url = 'https://raw.githubusercontent.com/eigenmagic/fediblockhole/main/samples/demo-blocklist-01.csv', format = 'csv' }, ] From a25773f838aa73f888eaf24832e12a089a14b159 Mon Sep 17 00:00:00 2001 From: Justin Warren Date: Sun, 15 Jan 2023 13:33:32 +1100 Subject: [PATCH 13/15] Allowlists just remove blocks from merged list before push. --- src/fediblockhole/__init__.py | 60 ++++++++++++++++++++++------------- tests/test_allowlist.py | 49 ++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+), 22 deletions(-) create mode 100644 tests/test_allowlist.py diff --git a/src/fediblockhole/__init__.py b/src/fediblockhole/__init__.py index 1b0ea53..945e29c 100755 --- a/src/fediblockhole/__init__.py +++ b/src/fediblockhole/__init__.py @@ -41,7 +41,7 @@ ALLOWLIST_IMPORT_FIELDS = ['domain', 'severity', 'public_comment', 'private_comm # We always export the domain and the severity EXPORT_FIELDS = ['domain', 'severity'] -def sync_blocklists(conf: dict): +def sync_blocklists(conf: argparse.Namespace): """Sync instance blocklists from remote sources. @param conf: A configuration dictionary @@ -73,24 +73,11 @@ def sync_blocklists(conf: dict): # Merge blocklists into an update dict merged = merge_blocklists(blocklists, conf.mergeplan) - # Apply allows specified on the commandline - for domain in conf.allow_domains: - log.info(f"Allowing domain '{domain}' specified on commandline.") - merged[domain] = DomainBlock(domain, 'noop') - - # Apply allows from URLs lists - if conf.allowlist_url_sources: - log.info("Adding allows from URL lists...") - allowlists = fetch_from_urls({}, conf.allowlist_url_sources, ALLOWLIST_IMPORT_FIELDS) - for key, alist in allowlists.items(): - log.debug(f"Processing allows from '{key}'...") - for allowed in alist: - # Ensure the severity is always 'noop' - # This is to prevent accidentally blocking something you wanted to allow. - allowed.severity = 'noop' - merged[allowed.domain] = allowed - log.debug(f"Allowed domain '{allowed.domain}' from allowlist: {allowed}") + # Remove items listed in allowlists, if any + allowlists = fetch_allowlists(conf) + merged = apply_allowlists(merged, conf, allowlists) + # Save the final mergelist, if requested if conf.blocklist_savefile: log.info(f"Saving merged blocklist to {conf.blocklist_savefile}") save_blocklist_to_file(merged.values(), conf.blocklist_savefile, export_fields) @@ -104,6 +91,35 @@ def sync_blocklists(conf: dict): max_followed_severity = BlockSeverity(dest.get('max_followed_severity', 'silence')) push_blocklist(token, domain, merged.values(), conf.dryrun, import_fields, max_followed_severity) +def apply_allowlists(merged: dict, conf: argparse.Namespace, allowlists: dict): + """Apply allowlists + """ + # Apply allows specified on the commandline + for domain in conf.allow_domains: + log.info(f"'{domain}' allowed by commandline, removing any blocks...") + if domain in merged: + del merged[domain] + + # Apply allows from URLs lists + log.info("Removing domains from URL allowlists...") + for key, alist in allowlists.items(): + log.debug(f"Processing allows from '{key}'...") + for allowed in alist: + domain = allowed.domain + log.debug(f"Removing allowlisted domain '{domain}' from merged list.") + if domain in merged: + del merged[domain] + + return merged + +def fetch_allowlists(conf: argparse.Namespace) -> dict: + """ + """ + if conf.allowlist_url_sources: + allowlists = fetch_from_urls({}, conf.allowlist_url_sources, ALLOWLIST_IMPORT_FIELDS) + return allowlists + return {} + def fetch_from_urls(blocklists: dict, url_sources: dict, import_fields: list=IMPORT_FIELDS, save_intermediate: bool=False, @@ -655,10 +671,10 @@ def augment_args(args, tomldata: str=None): if not args.mergeplan: args.mergeplan = conf.get('mergeplan', 'max') - args.blocklist_url_sources = conf.get('blocklist_url_sources', None) - args.blocklist_instance_sources = conf.get('blocklist_instance_sources', None) - args.allowlist_url_sources = conf.get('allowlist_url_sources', None) - args.blocklist_instance_destinations = conf.get('blocklist_instance_destinations', None) + args.blocklist_url_sources = conf.get('blocklist_url_sources', []) + args.blocklist_instance_sources = conf.get('blocklist_instance_sources', []) + args.allowlist_url_sources = conf.get('allowlist_url_sources', []) + args.blocklist_instance_destinations = conf.get('blocklist_instance_destinations', []) return args diff --git a/tests/test_allowlist.py b/tests/test_allowlist.py new file mode 100644 index 0000000..e632361 --- /dev/null +++ b/tests/test_allowlist.py @@ -0,0 +1,49 @@ +""" Test allowlists +""" +import pytest + +from util import shim_argparse +from fediblockhole.const import DomainBlock +from fediblockhole import fetch_allowlists, apply_allowlists + +def test_cmdline_allow_removes_domain(): + """Test that -A removes entries from merged + """ + conf = shim_argparse(['-A', 'removeme.org']) + + merged = { + 'example.org': DomainBlock('example.org'), + 'example2.org': DomainBlock('example.org'), + 'removeme.org': DomainBlock('removeme.org'), + 'keepblockingme.org': DomainBlock('keepblockingme.org'), + } + + # allowlists = { + # 'testlist': [ DomainBlock('removeme.org', 'noop'), ] + # } + + merged = apply_allowlists(merged, conf, {}) + + with pytest.raises(KeyError): + merged['removeme.org'] + +def test_allowlist_removes_domain(): + """Test that an item in an allowlist removes entries from merged + """ + conf = shim_argparse() + + merged = { + 'example.org': DomainBlock('example.org'), + 'example2.org': DomainBlock('example.org'), + 'removeme.org': DomainBlock('removeme.org'), + 'keepblockingme.org': DomainBlock('keepblockingme.org'), + } + + allowlists = { + 'testlist': [ DomainBlock('removeme.org', 'noop'), ] + } + + merged = apply_allowlists(merged, conf, allowlists) + + with pytest.raises(KeyError): + merged['removeme.org'] From dc4bbd740ba5f7f2ef75c129904ba0ffa3239256 Mon Sep 17 00:00:00 2001 From: Justin Warren Date: Sun, 15 Jan 2023 13:38:07 +1100 Subject: [PATCH 14/15] Updated README to explain allowlist mechanism. --- README.md | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index a4bebde..4e28482 100644 --- a/README.md +++ b/README.md @@ -289,22 +289,21 @@ use the highest severity it finds again (if you're using the `max` mergeplan). ### Allowlists -Sometimes you might want to override the blocklist definitions and always allow -certain domains to access your instance. That's what allowlists are for. +Sometimes you might want to completely ignore the blocklist definitions for +certain domains. That's what allowlists are for. -Allowlists can be any in format supported by `blocklist_urls_sources` but will -always set the severity to 'noop'. +Allowlists remove any domain in the list from the merged list of blocks before +the merged list is saved out to a file or pushed to any instance. -An allowlist can contain just the `domain` field and a set of domains, but can -also contain other fields if you want to add public or private comments, for -example. +Allowlists can be in any format supported by `blocklist_urls_sources` but ignore +all fields that aren't `domain`. You can also allow domains on the commandline by using the `-A` or `--allow` flag and providing the domain name to allow. You can use the flag multiple times to allow multiple domains. -The allowed domains will be included in the final merged list of domains that -gets exported if you choose to save the mergelist to a file. +It is probably wise to include your own instance domain in an allowlist so you +don't accidentally defederate from yourself. ## More advanced configuration From 550916dc9b8d9715fbcd7188c4359970a9bb9eaa Mon Sep 17 00:00:00 2001 From: Justin Warren Date: Mon, 16 Jan 2023 07:29:19 +1100 Subject: [PATCH 15/15] Fix incorrect SQL instructions for scope setting. See: #20 --- README.md | 54 +++++++++++++++++++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 4e28482..44a9864 100644 --- a/README.md +++ b/README.md @@ -81,20 +81,28 @@ token. The application needs the `admin:read:domain_blocks` OAuth scope, but unfortunately this scope isn't available in the current application screen -(v4.0.2 of Mastodon at time of writing). There is a way to do it with scopes, -but it's really dangerous, so I'm not going to tell you what it is here. +(v4.0.2 of Mastodon at time of writing, but this has been fixed in the main +branch). -A better way is to ask the instance admin to connect to the PostgreSQL database -and add the scope there, like this: +You can allow full `admin:read` access, but be aware that this authorizes +someone to read all the data in the instance. That's asking a lot of a remote +instance admin who just wants to share domain_blocks with you. + +For now, you can ask the instance admin to update the scope in the database +directly like this: ``` -UPDATE oauth_access_tokens - SET scopes='admin:read:domain_blocks' - WHERE token=''; +UPDATE oauth_applications as app + SET scopes = 'admin:read:domain_blocks' + FROM oauth_access_tokens as tok + WHERE app.id = tok.application_id + AND app.name = '' +; ``` -When that's done, FediBlockHole should be able to use its token to read domain -blocks via the API. +When that's done, regenerate the token (so it has the new scopes) in the +application screen in the instance GUI. FediBlockHole should then able to use +the app token to read domain blocks via the API, but nothing else. Alternately, you could ask the remote instance admin to set up FediBlockHole and use it to dump out a CSV blocklist from their instance and then put it somewhere @@ -104,12 +112,17 @@ as explained below. ### Writing instance blocklists To write domain blocks into an instance requires both the `admin:read` and -`admin:write:domain_blocks` OAuth scopes. The `read` scope is used to read the -current list of domain blocks so we update ones that already exist, rather than -trying to add all new ones and clutter up the instance. It's also used to check -if the instance has any accounts that follow accounts on a domain that is about -to get `suspend`ed and automatically drop the block severity to `silence` level -so people have time to migrate accounts before a full defederation takes effect. +`admin:write:domain_blocks` OAuth scopes. + +The tool needs `admin:read:domain_blocks` scope to read the current list of +domain blocks so we update ones that already exist, rather than trying to add +all new ones and clutter up the instance. + +`admin:read` access is needed to check if the instance has any accounts that +follow accounts on a domain that is about to get `suspend`ed and automatically +drop the block severity to `silence` level so people have time to migrate +accounts before a full defederation takes effect. Unfortunately, the statistics +measure used to learn this information requires `admin:read` scope. You can add `admin:read` scope in the application admin screen. Please be aware that this grants full read access to all information in the instance to the @@ -122,12 +135,15 @@ chmod o-r You can also grant full `admin:write` scope to the application, but if you'd prefer to keep things more tightly secured you'll need to use SQL to set the -scopes in the database: +scopes in the database and then regenerate the token: ``` -UPDATE oauth_access_tokens - SET scopes='admin:read admin:write:domain_blocks' - WHERE token=''; +UPDATE oauth_applications as app + SET scopes = 'admin:read admin:write:domain_blocks' + FROM oauth_access_tokens as tok + WHERE app.id = tok.application_id + AND app.name = '' +; ``` When that's done, FediBlockHole should be able to use its token to authorise