From 9817c99e40463e78de55dcb8ff526fbd20fe8d36 Mon Sep 17 00:00:00 2001 From: Justin Warren Date: Sat, 14 Jan 2023 10:41:54 +1100 Subject: [PATCH 01/13] Fixed bug in _asdict() of severity level. --- src/fediblockhole/const.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fediblockhole/const.py b/src/fediblockhole/const.py index 909d84d..e771c4b 100644 --- a/src/fediblockhole/const.py +++ b/src/fediblockhole/const.py @@ -156,7 +156,7 @@ class DomainBlock(object): """ dictval = { 'domain': self.domain, - 'severity': self.severity, + 'severity': str(self.severity), 'public_comment': self.public_comment, 'private_comment': self.private_comment, 'reject_media': self.reject_media, From 894b133fbb956bddeb24c8c6ae6be6af563f4449 Mon Sep 17 00:00:00 2001 From: Justin Warren Date: Sat, 14 Jan 2023 10:43:17 +1100 Subject: [PATCH 02/13] str2bool() now converts '' to False. Added some extra debug logging of blocklist parsing. --- src/fediblockhole/blocklist_parser.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/fediblockhole/blocklist_parser.py b/src/fediblockhole/blocklist_parser.py index 38b0b59..d5d8394 100644 --- a/src/fediblockhole/blocklist_parser.py +++ b/src/fediblockhole/blocklist_parser.py @@ -97,9 +97,10 @@ class BlocklistParserCSV(BlocklistParser): origitem = blockitem.copy() for key in origitem: if key not in self.import_fields: + log.debug(f"ignoring field '{key}'") del blockitem[key] - # Convert dict to NamedTuple with the double-star operator + # Convert dict to DomainBlock with the double-star operator # See: https://docs.python.org/3/tutorial/controlflow.html#tut-unpacking-arguments block = DomainBlock(**blockitem) if block.severity > self.max_severity: @@ -162,7 +163,7 @@ def str2bool(boolstring: str) -> bool: boolstring = boolstring.lower() if boolstring in ['true', 't', '1', 'y', 'yes']: return True - elif boolstring in ['false', 'f', '0', 'n', 'no']: + elif boolstring in ['', 'false', 'f', '0', 'n', 'no']: return False else: raise ValueError(f"Cannot parse value '{boolstring}' as boolean") @@ -183,4 +184,5 @@ def parse_blocklist( """Parse a blocklist in the given format """ parser = FORMAT_PARSERS[format](import_fields, max_severity) + log.debug(f"parsing {format} blocklist with import_fields: {import_fields}...") return parser.parse_blocklist(blockdata) \ No newline at end of file From 7a31c3380ef6aefdba895df678879251df483846 Mon Sep 17 00:00:00 2001 From: Justin Warren Date: Sat, 14 Jan 2023 10:44:08 +1100 Subject: [PATCH 03/13] Added support for allowlists. Updated docstring for merge_blocklists() --- src/fediblockhole/__init__.py | 39 +++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/src/fediblockhole/__init__.py b/src/fediblockhole/__init__.py index 6cdf143..fdb3a5e 100755 --- a/src/fediblockhole/__init__.py +++ b/src/fediblockhole/__init__.py @@ -35,6 +35,9 @@ API_CALL_DELAY = 5 * 60 / 300 # 300 calls per 5 minutes # We always import the domain and the severity IMPORT_FIELDS = ['domain', 'severity'] +# Allowlists always import these fields +ALLOWLIST_IMPORT_FIELDS = ['domain', 'severity', 'public_comment', 'private_comment', 'reject_media', 'reject_reports', 'obfuscate'] + # We always export the domain and the severity EXPORT_FIELDS = ['domain', 'severity'] @@ -69,6 +72,22 @@ def sync_blocklists(conf: dict): # Merge blocklists into an update dict merged = merge_blocklists(blocklists, conf.mergeplan) + + # Apply allows specified on the commandline + for domain in conf.allow_domains: + log.info(f"Allowing domain '{domain}' specified on commandline.") + merged[domain] = DomainBlock(domain, 'noop') + + # Apply allows from URLs lists + if conf.allowlist_url_sources: + log.info("Adding allows from URL lists...") + allowlists = fetch_from_urls({}, conf.allowlist_url_sources, ALLOWLIST_IMPORT_FIELDS) + for key, alist in allowlists.items(): + log.debug(f"Processing allows from '{key}'...") + for allowed in alist: + merged[allowed.domain] = allowed + log.debug(f"Allowed domain '{allowed.domain}' from allowlist: {allowed}") + if conf.blocklist_savefile: log.info(f"Saving merged blocklist to {conf.blocklist_savefile}") save_blocklist_to_file(merged.values(), conf.blocklist_savefile, export_fields) @@ -140,9 +159,12 @@ def fetch_from_instances(blocklists: dict, sources: dict, def merge_blocklists(blocklists: dict, mergeplan: str='max') -> dict: """Merge fetched remote blocklists into a bulk update + @param blocklists: A dict of lists of DomainBlocks, keyed by source. + Each value is a list of DomainBlocks @param mergeplan: An optional method of merging overlapping block definitions 'max' (the default) uses the highest severity block found 'min' uses the lowest severity block found + @param returns: A dict of DomainBlocks keyed by domain """ merged = {} @@ -433,7 +455,7 @@ def update_known_block(token: str, host: str, block: DomainBlock): response = requests.put(url, headers=requests_headers(token), - data=blockdata, + json=blockdata._asdict(), timeout=REQUEST_TIMEOUT ) if response.status_code != 200: @@ -442,14 +464,14 @@ def update_known_block(token: str, host: str, block: DomainBlock): def add_block(token: str, host: str, blockdata: DomainBlock): """Block a domain on Mastodon host """ - log.debug(f"Blocking domain {blockdata.domain} at {host}...") + log.debug(f"Adding block entry for {blockdata.domain} at {host}...") api_path = "/api/v1/admin/domain_blocks" url = f"https://{host}{api_path}" response = requests.post(url, headers=requests_headers(token), - data=blockdata._asdict(), + json=blockdata._asdict(), timeout=REQUEST_TIMEOUT ) if response.status_code == 422: @@ -515,6 +537,8 @@ def push_blocklist(token: str, host: str, blocklist: list[dict], log.info(f"Pushing new block definition: {newblock}") blockdata = oldblock.copy() blockdata.update(newblock) + log.debug(f"Block as dict: {blockdata._asdict()}") + if not dryrun: update_known_block(token, host, blockdata) # add a pause here so we don't melt the instance @@ -530,6 +554,7 @@ def push_blocklist(token: str, host: str, blocklist: list[dict], # This is a new block for the target instance, so we # need to add a block rather than update an existing one log.info(f"Adding new block: {newblock}...") + log.debug(f"Block as dict: {newblock._asdict()}") # Make sure the new block doesn't clobber a domain with followers newblock.severity = check_followed_severity(host, token, newblock.domain, newblock.severity, max_followed_severity) @@ -615,9 +640,10 @@ def augment_args(args): if not args.import_fields: args.import_fields = conf.get('import_fields', []) - args.blocklist_url_sources = conf.get('blocklist_url_sources') - args.blocklist_instance_sources = conf.get('blocklist_instance_sources') - args.blocklist_instance_destinations = conf.get('blocklist_instance_destinations') + args.blocklist_url_sources = conf.get('blocklist_url_sources', None) + args.blocklist_instance_sources = conf.get('blocklist_instance_sources', None) + args.allowlist_url_sources = conf.get('allowlist_url_sources', None) + args.blocklist_instance_destinations = conf.get('blocklist_instance_destinations', None) return args @@ -637,6 +663,7 @@ def main(): ap.add_argument('-I', '--import-field', dest='import_fields', action='append', help="Extra blocklist fields to import.") ap.add_argument('-E', '--export-field', dest='export_fields', action='append', help="Extra blocklist fields to export.") + ap.add_argument('-A', '--allow', dest="allow_domains", action='append', default=[], help="Override any blocks to allow this domain.") ap.add_argument('--no-fetch-url', dest='no_fetch_url', action='store_true', help="Don't fetch from URLs, even if configured.") ap.add_argument('--no-fetch-instance', dest='no_fetch_instance', action='store_true', help="Don't fetch from instances, even if configured.") From 6d4e18bbf6f25da2512fe3ef902341f8ebddcae5 Mon Sep 17 00:00:00 2001 From: Justin Warren Date: Sat, 14 Jan 2023 10:59:19 +1100 Subject: [PATCH 04/13] Fixed bug in how DomainBlock defaults handle reject_media, reject_reports. --- src/fediblockhole/const.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/fediblockhole/const.py b/src/fediblockhole/const.py index e771c4b..b67959b 100644 --- a/src/fediblockhole/const.py +++ b/src/fediblockhole/const.py @@ -127,13 +127,15 @@ class DomainBlock(object): """Initialize the DomainBlock """ self.domain = domain + # Set severity first so if reject_media or reject_reports = False + # that overrides the default of True for severity = 'suspend' + self.severity = severity self.public_comment = public_comment self.private_comment = private_comment self.reject_media = reject_media self.reject_reports = reject_reports self.obfuscate = obfuscate self.id = id - self.severity = severity @property def severity(self): @@ -147,6 +149,7 @@ class DomainBlock(object): self._severity = BlockSeverity(sev) # Suspend implies reject_media,reject_reports == True + log.debug('Suspend blocks media and reports') if self._severity.level == SeverityLevel.SUSPEND: self.reject_media = True self.reject_reports = True From 17e0bdab0bb4758dc6e1e52991199fba985e81f4 Mon Sep 17 00:00:00 2001 From: Justin Warren Date: Sat, 14 Jan 2023 11:00:21 +1100 Subject: [PATCH 05/13] Force allowlist entries to always be severity 'noop'. --- src/fediblockhole/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/fediblockhole/__init__.py b/src/fediblockhole/__init__.py index fdb3a5e..1daad8a 100755 --- a/src/fediblockhole/__init__.py +++ b/src/fediblockhole/__init__.py @@ -85,6 +85,9 @@ def sync_blocklists(conf: dict): for key, alist in allowlists.items(): log.debug(f"Processing allows from '{key}'...") for allowed in alist: + # Ensure the severity is always 'noop' + # This is to prevent accidentally blocking something you wanted to allow. + allowed.severity = 'noop' merged[allowed.domain] = allowed log.debug(f"Allowed domain '{allowed.domain}' from allowlist: {allowed}") From 26f54648267308e165cfacbbd9a23315fd5d4f3b Mon Sep 17 00:00:00 2001 From: Justin Warren Date: Sat, 14 Jan 2023 11:09:38 +1100 Subject: [PATCH 06/13] Added documentation on allowlists. --- README.md | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index d93b537..a4bebde 100644 --- a/README.md +++ b/README.md @@ -159,11 +159,12 @@ Or you can use the default location of `/etc/default/fediblockhole.conf.toml`. As the filename suggests, FediBlockHole uses TOML syntax. -There are 3 key sections: +There are 4 key sections: 1. `blocklist_urls_sources`: A list of URLs to read blocklists from 1. `blocklist_instance_sources`: A list of Mastodon instances to read blocklists from via API 1. `blocklist_instance_destinations`: A list of Mastodon instances to write blocklists to via API + 1. `allowlist_url_sources`: A list of URLs to read allowlists from More detail on configuring the tool is provided below. @@ -286,6 +287,25 @@ mergeplan. Once the follow count drops to 0 on your instance, the tool will automatically use the highest severity it finds again (if you're using the `max` mergeplan). +### Allowlists + +Sometimes you might want to override the blocklist definitions and always allow +certain domains to access your instance. That's what allowlists are for. + +Allowlists can be any in format supported by `blocklist_urls_sources` but will +always set the severity to 'noop'. + +An allowlist can contain just the `domain` field and a set of domains, but can +also contain other fields if you want to add public or private comments, for +example. + +You can also allow domains on the commandline by using the `-A` or `--allow` +flag and providing the domain name to allow. You can use the flag multiple +times to allow multiple domains. + +The allowed domains will be included in the final merged list of domains that +gets exported if you choose to save the mergelist to a file. + ## More advanced configuration For a list of possible configuration options, check the `--help` and read the From 7bc10d6690ea909aa5f90af5455462a8bbf8298b Mon Sep 17 00:00:00 2001 From: Justin Warren Date: Sat, 14 Jan 2023 11:09:53 +1100 Subject: [PATCH 07/13] Version bump to v0.4.1 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 24f9aff..f982ffe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "fediblockhole" -version = "0.4.0" +version = "0.4.1" description = "Federated blocklist management for Mastodon" readme = "README.md" license = {file = "LICENSE"} From 3aa2e378e3d184c3408ee89b0e08c6fb731ff4a5 Mon Sep 17 00:00:00 2001 From: Justin Warren Date: Sun, 15 Jan 2023 10:02:12 +1100 Subject: [PATCH 08/13] Remove implied setting of reject_media/reports if severity is set to 'suspend'. Complicates understanding of what the code actually does. --- src/fediblockhole/const.py | 8 -------- tests/test_domainblock.py | 9 --------- 2 files changed, 17 deletions(-) diff --git a/src/fediblockhole/const.py b/src/fediblockhole/const.py index b67959b..93cf2ef 100644 --- a/src/fediblockhole/const.py +++ b/src/fediblockhole/const.py @@ -127,8 +127,6 @@ class DomainBlock(object): """Initialize the DomainBlock """ self.domain = domain - # Set severity first so if reject_media or reject_reports = False - # that overrides the default of True for severity = 'suspend' self.severity = severity self.public_comment = public_comment self.private_comment = private_comment @@ -148,12 +146,6 @@ class DomainBlock(object): else: self._severity = BlockSeverity(sev) - # Suspend implies reject_media,reject_reports == True - log.debug('Suspend blocks media and reports') - if self._severity.level == SeverityLevel.SUSPEND: - self.reject_media = True - self.reject_reports = True - def _asdict(self): """Return a dict version of this object """ diff --git a/tests/test_domainblock.py b/tests/test_domainblock.py index 783fcd8..2db0b51 100644 --- a/tests/test_domainblock.py +++ b/tests/test_domainblock.py @@ -72,12 +72,3 @@ def test_compare_diff_sevs_2(): b = DomainBlock('example1.org', 'noop') assert a != b - -def test_suspend_rejects(): - """A suspend should reject_media and reject_reports - """ - a = DomainBlock('example.org', 'suspend') - - assert a.severity.level == SeverityLevel.SUSPEND - assert a.reject_media == True - assert a.reject_reports == True \ No newline at end of file From a3d3571a20539aae13adcdb9cb863e2a4ad8b1d9 Mon Sep 17 00:00:00 2001 From: Justin Warren Date: Sun, 15 Jan 2023 10:10:52 +1100 Subject: [PATCH 09/13] Added basic tests of allowlist config args. --- tests/test_cmdline.py | 16 +++++++++++++++- tests/test_configfile.py | 11 +++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/tests/test_cmdline.py b/tests/test_cmdline.py index ed63349..4e6f355 100644 --- a/tests/test_cmdline.py +++ b/tests/test_cmdline.py @@ -37,4 +37,18 @@ def test_cmdline_mergeplan_min(): ap = setup_argparse() args = ap.parse_args(['-m', 'min']) - assert args.mergeplan == 'min' \ No newline at end of file + assert args.mergeplan == 'min' + +def test_set_allow_domain(): + """Set a single allow domain on commandline""" + ap = setup_argparse() + args = ap.parse_args(['-A', 'example.org']) + + assert args.allow_domains == ['example.org'] + +def test_set_multiple_allow_domains(): + """Set multiple allow domains on commandline""" + ap = setup_argparse() + args = ap.parse_args(['-A', 'example.org', '-A', 'example2.org', '-A', 'example3.org']) + + assert args.allow_domains == ['example.org', 'example2.org', 'example3.org'] \ No newline at end of file diff --git a/tests/test_configfile.py b/tests/test_configfile.py index b6fb342..d52a425 100644 --- a/tests/test_configfile.py +++ b/tests/test_configfile.py @@ -45,3 +45,14 @@ def test_set_mergeplan_min(): assert args.mergeplan == 'min' +def test_set_allowlists(): + tomldata = """# Comment on config +allowlist_url_sources = [ { url='file:///path/to/allowlist', format='csv'} ] +""" + args = shim_argparse([], tomldata) + + assert args.mergeplan == 'max' + assert args.allowlist_url_sources == [{ + 'url': 'file:///path/to/allowlist', + 'format': 'csv', + }] From bf48a9639ea0390d34181d35e874f444096b91bc Mon Sep 17 00:00:00 2001 From: Justin Warren Date: Sun, 15 Jan 2023 13:32:13 +1100 Subject: [PATCH 10/13] Added helper submodule for testing utils --- setup.cfg | 2 ++ tests/conftest.py | 3 +++ tests/helpers/__init__.py | 0 tests/helpers/util.py | 11 +++++++++++ tests/test_cmdline.py | 9 +-------- tests/test_configfile.py | 9 +-------- 6 files changed, 18 insertions(+), 16 deletions(-) create mode 100644 setup.cfg create mode 100644 tests/conftest.py create mode 100644 tests/helpers/__init__.py create mode 100644 tests/helpers/util.py diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..6d8a5af --- /dev/null +++ b/setup.cfg @@ -0,0 +1,2 @@ +[pytest] +norecursedirs=tests/helpers \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..501ed17 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,3 @@ +import sys +import os +sys.path.append(os.path.join(os.path.dirname(__file__), 'helpers')) \ No newline at end of file diff --git a/tests/helpers/__init__.py b/tests/helpers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/helpers/util.py b/tests/helpers/util.py new file mode 100644 index 0000000..faed6e1 --- /dev/null +++ b/tests/helpers/util.py @@ -0,0 +1,11 @@ +""" Utility functions for tests +""" +from fediblockhole import setup_argparse, augment_args + +def shim_argparse(testargv: list=[], tomldata: str=None): + """Helper function to parse test args + """ + ap = setup_argparse() + args = ap.parse_args(testargv) + args = augment_args(args, tomldata) + return args \ No newline at end of file diff --git a/tests/test_cmdline.py b/tests/test_cmdline.py index 4e6f355..46b5748 100644 --- a/tests/test_cmdline.py +++ b/tests/test_cmdline.py @@ -1,15 +1,8 @@ """Test the commandline defined parameters correctly """ +from util import shim_argparse from fediblockhole import setup_argparse, augment_args -def shim_argparse(testargv: list=[], tomldata: str=None): - """Helper function to parse test args - """ - ap = setup_argparse() - args = ap.parse_args(testargv) - args = augment_args(args, tomldata) - return args - def test_cmdline_no_configfile(): """ Test bare command with no configfile """ diff --git a/tests/test_configfile.py b/tests/test_configfile.py index d52a425..4b2c1e7 100644 --- a/tests/test_configfile.py +++ b/tests/test_configfile.py @@ -1,15 +1,8 @@ """Test the config file is loading parameters correctly """ +from util import shim_argparse from fediblockhole import setup_argparse, augment_args -def shim_argparse(testargv: list=[], tomldata: str=None): - """Helper function to parse test args - """ - ap = setup_argparse() - args = ap.parse_args(testargv) - args = augment_args(args, tomldata) - return args - def test_parse_tomldata(): tomldata = """ # Test TOML config for FediBlockHole From 9bd79145dca845cdb7df1a388eebc0c99be705c0 Mon Sep 17 00:00:00 2001 From: Justin Warren Date: Sun, 15 Jan 2023 13:32:45 +1100 Subject: [PATCH 11/13] Edited sample config to better explain URL source --- etc/sample.fediblockhole.conf.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etc/sample.fediblockhole.conf.toml b/etc/sample.fediblockhole.conf.toml index 637dde2..5190d25 100644 --- a/etc/sample.fediblockhole.conf.toml +++ b/etc/sample.fediblockhole.conf.toml @@ -16,7 +16,7 @@ blocklist_instance_sources = [ # max_severity tells the parser to override any severities that are higher than this value # import_fields tells the parser to only import that set of fields from a specific source blocklist_url_sources = [ - # { url = 'file:///home/daedalus/src/fediblockhole/samples/demo-blocklist-01.csv', format = 'csv' }, + # { url = 'file:///path/to/fediblockhole/samples/demo-blocklist-01.csv', format = 'csv' }, { url = 'https://raw.githubusercontent.com/eigenmagic/fediblockhole/main/samples/demo-blocklist-01.csv', format = 'csv' }, ] From a25773f838aa73f888eaf24832e12a089a14b159 Mon Sep 17 00:00:00 2001 From: Justin Warren Date: Sun, 15 Jan 2023 13:33:32 +1100 Subject: [PATCH 12/13] Allowlists just remove blocks from merged list before push. --- src/fediblockhole/__init__.py | 60 ++++++++++++++++++++++------------- tests/test_allowlist.py | 49 ++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+), 22 deletions(-) create mode 100644 tests/test_allowlist.py diff --git a/src/fediblockhole/__init__.py b/src/fediblockhole/__init__.py index 1b0ea53..945e29c 100755 --- a/src/fediblockhole/__init__.py +++ b/src/fediblockhole/__init__.py @@ -41,7 +41,7 @@ ALLOWLIST_IMPORT_FIELDS = ['domain', 'severity', 'public_comment', 'private_comm # We always export the domain and the severity EXPORT_FIELDS = ['domain', 'severity'] -def sync_blocklists(conf: dict): +def sync_blocklists(conf: argparse.Namespace): """Sync instance blocklists from remote sources. @param conf: A configuration dictionary @@ -73,24 +73,11 @@ def sync_blocklists(conf: dict): # Merge blocklists into an update dict merged = merge_blocklists(blocklists, conf.mergeplan) - # Apply allows specified on the commandline - for domain in conf.allow_domains: - log.info(f"Allowing domain '{domain}' specified on commandline.") - merged[domain] = DomainBlock(domain, 'noop') - - # Apply allows from URLs lists - if conf.allowlist_url_sources: - log.info("Adding allows from URL lists...") - allowlists = fetch_from_urls({}, conf.allowlist_url_sources, ALLOWLIST_IMPORT_FIELDS) - for key, alist in allowlists.items(): - log.debug(f"Processing allows from '{key}'...") - for allowed in alist: - # Ensure the severity is always 'noop' - # This is to prevent accidentally blocking something you wanted to allow. - allowed.severity = 'noop' - merged[allowed.domain] = allowed - log.debug(f"Allowed domain '{allowed.domain}' from allowlist: {allowed}") + # Remove items listed in allowlists, if any + allowlists = fetch_allowlists(conf) + merged = apply_allowlists(merged, conf, allowlists) + # Save the final mergelist, if requested if conf.blocklist_savefile: log.info(f"Saving merged blocklist to {conf.blocklist_savefile}") save_blocklist_to_file(merged.values(), conf.blocklist_savefile, export_fields) @@ -104,6 +91,35 @@ def sync_blocklists(conf: dict): max_followed_severity = BlockSeverity(dest.get('max_followed_severity', 'silence')) push_blocklist(token, domain, merged.values(), conf.dryrun, import_fields, max_followed_severity) +def apply_allowlists(merged: dict, conf: argparse.Namespace, allowlists: dict): + """Apply allowlists + """ + # Apply allows specified on the commandline + for domain in conf.allow_domains: + log.info(f"'{domain}' allowed by commandline, removing any blocks...") + if domain in merged: + del merged[domain] + + # Apply allows from URLs lists + log.info("Removing domains from URL allowlists...") + for key, alist in allowlists.items(): + log.debug(f"Processing allows from '{key}'...") + for allowed in alist: + domain = allowed.domain + log.debug(f"Removing allowlisted domain '{domain}' from merged list.") + if domain in merged: + del merged[domain] + + return merged + +def fetch_allowlists(conf: argparse.Namespace) -> dict: + """ + """ + if conf.allowlist_url_sources: + allowlists = fetch_from_urls({}, conf.allowlist_url_sources, ALLOWLIST_IMPORT_FIELDS) + return allowlists + return {} + def fetch_from_urls(blocklists: dict, url_sources: dict, import_fields: list=IMPORT_FIELDS, save_intermediate: bool=False, @@ -655,10 +671,10 @@ def augment_args(args, tomldata: str=None): if not args.mergeplan: args.mergeplan = conf.get('mergeplan', 'max') - args.blocklist_url_sources = conf.get('blocklist_url_sources', None) - args.blocklist_instance_sources = conf.get('blocklist_instance_sources', None) - args.allowlist_url_sources = conf.get('allowlist_url_sources', None) - args.blocklist_instance_destinations = conf.get('blocklist_instance_destinations', None) + args.blocklist_url_sources = conf.get('blocklist_url_sources', []) + args.blocklist_instance_sources = conf.get('blocklist_instance_sources', []) + args.allowlist_url_sources = conf.get('allowlist_url_sources', []) + args.blocklist_instance_destinations = conf.get('blocklist_instance_destinations', []) return args diff --git a/tests/test_allowlist.py b/tests/test_allowlist.py new file mode 100644 index 0000000..e632361 --- /dev/null +++ b/tests/test_allowlist.py @@ -0,0 +1,49 @@ +""" Test allowlists +""" +import pytest + +from util import shim_argparse +from fediblockhole.const import DomainBlock +from fediblockhole import fetch_allowlists, apply_allowlists + +def test_cmdline_allow_removes_domain(): + """Test that -A removes entries from merged + """ + conf = shim_argparse(['-A', 'removeme.org']) + + merged = { + 'example.org': DomainBlock('example.org'), + 'example2.org': DomainBlock('example.org'), + 'removeme.org': DomainBlock('removeme.org'), + 'keepblockingme.org': DomainBlock('keepblockingme.org'), + } + + # allowlists = { + # 'testlist': [ DomainBlock('removeme.org', 'noop'), ] + # } + + merged = apply_allowlists(merged, conf, {}) + + with pytest.raises(KeyError): + merged['removeme.org'] + +def test_allowlist_removes_domain(): + """Test that an item in an allowlist removes entries from merged + """ + conf = shim_argparse() + + merged = { + 'example.org': DomainBlock('example.org'), + 'example2.org': DomainBlock('example.org'), + 'removeme.org': DomainBlock('removeme.org'), + 'keepblockingme.org': DomainBlock('keepblockingme.org'), + } + + allowlists = { + 'testlist': [ DomainBlock('removeme.org', 'noop'), ] + } + + merged = apply_allowlists(merged, conf, allowlists) + + with pytest.raises(KeyError): + merged['removeme.org'] From dc4bbd740ba5f7f2ef75c129904ba0ffa3239256 Mon Sep 17 00:00:00 2001 From: Justin Warren Date: Sun, 15 Jan 2023 13:38:07 +1100 Subject: [PATCH 13/13] Updated README to explain allowlist mechanism. --- README.md | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index a4bebde..4e28482 100644 --- a/README.md +++ b/README.md @@ -289,22 +289,21 @@ use the highest severity it finds again (if you're using the `max` mergeplan). ### Allowlists -Sometimes you might want to override the blocklist definitions and always allow -certain domains to access your instance. That's what allowlists are for. +Sometimes you might want to completely ignore the blocklist definitions for +certain domains. That's what allowlists are for. -Allowlists can be any in format supported by `blocklist_urls_sources` but will -always set the severity to 'noop'. +Allowlists remove any domain in the list from the merged list of blocks before +the merged list is saved out to a file or pushed to any instance. -An allowlist can contain just the `domain` field and a set of domains, but can -also contain other fields if you want to add public or private comments, for -example. +Allowlists can be in any format supported by `blocklist_urls_sources` but ignore +all fields that aren't `domain`. You can also allow domains on the commandline by using the `-A` or `--allow` flag and providing the domain name to allow. You can use the flag multiple times to allow multiple domains. -The allowed domains will be included in the final merged list of domains that -gets exported if you choose to save the mergelist to a file. +It is probably wise to include your own instance domain in an allowlist so you +don't accidentally defederate from yourself. ## More advanced configuration