Merge pull request #24 from eigenmagic/allowlists

Add allowlists
This commit is contained in:
Justin Warren 2023-01-15 17:22:18 +11:00 committed by GitHub
commit 12cbee0e0c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 171 additions and 42 deletions

View File

@ -159,11 +159,12 @@ Or you can use the default location of `/etc/default/fediblockhole.conf.toml`.
As the filename suggests, FediBlockHole uses TOML syntax.
There are 3 key sections:
There are 4 key sections:
1. `blocklist_urls_sources`: A list of URLs to read blocklists from
1. `blocklist_instance_sources`: A list of Mastodon instances to read blocklists from via API
1. `blocklist_instance_destinations`: A list of Mastodon instances to write blocklists to via API
1. `allowlist_url_sources`: A list of URLs to read allowlists from
More detail on configuring the tool is provided below.
@ -286,6 +287,24 @@ mergeplan.
Once the follow count drops to 0 on your instance, the tool will automatically
use the highest severity it finds again (if you're using the `max` mergeplan).
### Allowlists
Sometimes you might want to completely ignore the blocklist definitions for
certain domains. That's what allowlists are for.
Allowlists remove any domain in the list from the merged list of blocks before
the merged list is saved out to a file or pushed to any instance.
Allowlists can be in any format supported by `blocklist_urls_sources` but ignore
all fields that aren't `domain`.
You can also allow domains on the commandline by using the `-A` or `--allow`
flag and providing the domain name to allow. You can use the flag multiple
times to allow multiple domains.
It is probably wise to include your own instance domain in an allowlist so you
don't accidentally defederate from yourself.
## More advanced configuration
For a list of possible configuration options, check the `--help` and read the

View File

@ -16,7 +16,7 @@ blocklist_instance_sources = [
# max_severity tells the parser to override any severities that are higher than this value
# import_fields tells the parser to only import that set of fields from a specific source
blocklist_url_sources = [
# { url = 'file:///home/daedalus/src/fediblockhole/samples/demo-blocklist-01.csv', format = 'csv' },
# { url = 'file:///path/to/fediblockhole/samples/demo-blocklist-01.csv', format = 'csv' },
{ url = 'https://raw.githubusercontent.com/eigenmagic/fediblockhole/main/samples/demo-blocklist-01.csv', format = 'csv' },
]

View File

@ -1,6 +1,6 @@
[project]
name = "fediblockhole"
version = "0.4.0"
version = "0.4.1"
description = "Federated blocklist management for Mastodon"
readme = "README.md"
license = {file = "LICENSE"}

2
setup.cfg Normal file
View File

@ -0,0 +1,2 @@
[pytest]
norecursedirs=tests/helpers

View File

@ -35,10 +35,13 @@ API_CALL_DELAY = 5 * 60 / 300 # 300 calls per 5 minutes
# We always import the domain and the severity
IMPORT_FIELDS = ['domain', 'severity']
# Allowlists always import these fields
ALLOWLIST_IMPORT_FIELDS = ['domain', 'severity', 'public_comment', 'private_comment', 'reject_media', 'reject_reports', 'obfuscate']
# We always export the domain and the severity
EXPORT_FIELDS = ['domain', 'severity']
def sync_blocklists(conf: dict):
def sync_blocklists(conf: argparse.Namespace):
"""Sync instance blocklists from remote sources.
@param conf: A configuration dictionary
@ -69,6 +72,12 @@ def sync_blocklists(conf: dict):
# Merge blocklists into an update dict
merged = merge_blocklists(blocklists, conf.mergeplan)
# Remove items listed in allowlists, if any
allowlists = fetch_allowlists(conf)
merged = apply_allowlists(merged, conf, allowlists)
# Save the final mergelist, if requested
if conf.blocklist_savefile:
log.info(f"Saving merged blocklist to {conf.blocklist_savefile}")
save_blocklist_to_file(merged.values(), conf.blocklist_savefile, export_fields)
@ -82,6 +91,35 @@ def sync_blocklists(conf: dict):
max_followed_severity = BlockSeverity(dest.get('max_followed_severity', 'silence'))
push_blocklist(token, domain, merged.values(), conf.dryrun, import_fields, max_followed_severity)
def apply_allowlists(merged: dict, conf: argparse.Namespace, allowlists: dict):
"""Apply allowlists
"""
# Apply allows specified on the commandline
for domain in conf.allow_domains:
log.info(f"'{domain}' allowed by commandline, removing any blocks...")
if domain in merged:
del merged[domain]
# Apply allows from URLs lists
log.info("Removing domains from URL allowlists...")
for key, alist in allowlists.items():
log.debug(f"Processing allows from '{key}'...")
for allowed in alist:
domain = allowed.domain
log.debug(f"Removing allowlisted domain '{domain}' from merged list.")
if domain in merged:
del merged[domain]
return merged
def fetch_allowlists(conf: argparse.Namespace) -> dict:
"""
"""
if conf.allowlist_url_sources:
allowlists = fetch_from_urls({}, conf.allowlist_url_sources, ALLOWLIST_IMPORT_FIELDS)
return allowlists
return {}
def fetch_from_urls(blocklists: dict, url_sources: dict,
import_fields: list=IMPORT_FIELDS,
save_intermediate: bool=False,
@ -142,9 +180,12 @@ def fetch_from_instances(blocklists: dict, sources: dict,
def merge_blocklists(blocklists: dict, mergeplan: str='max') -> dict:
"""Merge fetched remote blocklists into a bulk update
@param blocklists: A dict of lists of DomainBlocks, keyed by source.
Each value is a list of DomainBlocks
@param mergeplan: An optional method of merging overlapping block definitions
'max' (the default) uses the highest severity block found
'min' uses the lowest severity block found
@param returns: A dict of DomainBlocks keyed by domain
"""
merged = {}
@ -435,7 +476,7 @@ def update_known_block(token: str, host: str, block: DomainBlock):
response = requests.put(url,
headers=requests_headers(token),
data=blockdata,
json=blockdata._asdict(),
timeout=REQUEST_TIMEOUT
)
if response.status_code != 200:
@ -444,14 +485,14 @@ def update_known_block(token: str, host: str, block: DomainBlock):
def add_block(token: str, host: str, blockdata: DomainBlock):
"""Block a domain on Mastodon host
"""
log.debug(f"Blocking domain {blockdata.domain} at {host}...")
log.debug(f"Adding block entry for {blockdata.domain} at {host}...")
api_path = "/api/v1/admin/domain_blocks"
url = f"https://{host}{api_path}"
response = requests.post(url,
headers=requests_headers(token),
data=blockdata._asdict(),
json=blockdata._asdict(),
timeout=REQUEST_TIMEOUT
)
if response.status_code == 422:
@ -517,6 +558,8 @@ def push_blocklist(token: str, host: str, blocklist: list[dict],
log.info(f"Pushing new block definition: {newblock}")
blockdata = oldblock.copy()
blockdata.update(newblock)
log.debug(f"Block as dict: {blockdata._asdict()}")
if not dryrun:
update_known_block(token, host, blockdata)
# add a pause here so we don't melt the instance
@ -532,6 +575,7 @@ def push_blocklist(token: str, host: str, blocklist: list[dict],
# This is a new block for the target instance, so we
# need to add a block rather than update an existing one
log.info(f"Adding new block: {newblock}...")
log.debug(f"Block as dict: {newblock._asdict()}")
# Make sure the new block doesn't clobber a domain with followers
newblock.severity = check_followed_severity(host, token, newblock.domain, newblock.severity, max_followed_severity)
@ -629,6 +673,7 @@ def augment_args(args, tomldata: str=None):
args.blocklist_url_sources = conf.get('blocklist_url_sources', [])
args.blocklist_instance_sources = conf.get('blocklist_instance_sources', [])
args.allowlist_url_sources = conf.get('allowlist_url_sources', [])
args.blocklist_instance_destinations = conf.get('blocklist_instance_destinations', [])
return args
@ -650,6 +695,7 @@ def setup_argparse():
ap.add_argument('-I', '--import-field', dest='import_fields', action='append', help="Extra blocklist fields to import.")
ap.add_argument('-E', '--export-field', dest='export_fields', action='append', help="Extra blocklist fields to export.")
ap.add_argument('-A', '--allow', dest="allow_domains", action='append', default=[], help="Override any blocks to allow this domain.")
ap.add_argument('--no-fetch-url', dest='no_fetch_url', action='store_true', help="Don't fetch from URLs, even if configured.")
ap.add_argument('--no-fetch-instance', dest='no_fetch_instance', action='store_true', help="Don't fetch from instances, even if configured.")

View File

@ -97,9 +97,10 @@ class BlocklistParserCSV(BlocklistParser):
origitem = blockitem.copy()
for key in origitem:
if key not in self.import_fields:
log.debug(f"ignoring field '{key}'")
del blockitem[key]
# Convert dict to NamedTuple with the double-star operator
# Convert dict to DomainBlock with the double-star operator
# See: https://docs.python.org/3/tutorial/controlflow.html#tut-unpacking-arguments
block = DomainBlock(**blockitem)
if block.severity > self.max_severity:
@ -162,7 +163,7 @@ def str2bool(boolstring: str) -> bool:
boolstring = boolstring.lower()
if boolstring in ['true', 't', '1', 'y', 'yes']:
return True
elif boolstring in ['false', 'f', '0', 'n', 'no']:
elif boolstring in ['', 'false', 'f', '0', 'n', 'no']:
return False
else:
raise ValueError(f"Cannot parse value '{boolstring}' as boolean")
@ -183,4 +184,5 @@ def parse_blocklist(
"""Parse a blocklist in the given format
"""
parser = FORMAT_PARSERS[format](import_fields, max_severity)
log.debug(f"parsing {format} blocklist with import_fields: {import_fields}...")
return parser.parse_blocklist(blockdata)

View File

@ -127,13 +127,13 @@ class DomainBlock(object):
"""Initialize the DomainBlock
"""
self.domain = domain
self.severity = severity
self.public_comment = public_comment
self.private_comment = private_comment
self.reject_media = reject_media
self.reject_reports = reject_reports
self.obfuscate = obfuscate
self.id = id
self.severity = severity
@property
def severity(self):
@ -146,17 +146,12 @@ class DomainBlock(object):
else:
self._severity = BlockSeverity(sev)
# Suspend implies reject_media,reject_reports == True
if self._severity.level == SeverityLevel.SUSPEND:
self.reject_media = True
self.reject_reports = True
def _asdict(self):
"""Return a dict version of this object
"""
dictval = {
'domain': self.domain,
'severity': self.severity,
'severity': str(self.severity),
'public_comment': self.public_comment,
'private_comment': self.private_comment,
'reject_media': self.reject_media,

3
tests/conftest.py Normal file
View File

@ -0,0 +1,3 @@
import sys
import os
sys.path.append(os.path.join(os.path.dirname(__file__), 'helpers'))

View File

11
tests/helpers/util.py Normal file
View File

@ -0,0 +1,11 @@
""" Utility functions for tests
"""
from fediblockhole import setup_argparse, augment_args
def shim_argparse(testargv: list=[], tomldata: str=None):
"""Helper function to parse test args
"""
ap = setup_argparse()
args = ap.parse_args(testargv)
args = augment_args(args, tomldata)
return args

49
tests/test_allowlist.py Normal file
View File

@ -0,0 +1,49 @@
""" Test allowlists
"""
import pytest
from util import shim_argparse
from fediblockhole.const import DomainBlock
from fediblockhole import fetch_allowlists, apply_allowlists
def test_cmdline_allow_removes_domain():
"""Test that -A <domain> removes entries from merged
"""
conf = shim_argparse(['-A', 'removeme.org'])
merged = {
'example.org': DomainBlock('example.org'),
'example2.org': DomainBlock('example.org'),
'removeme.org': DomainBlock('removeme.org'),
'keepblockingme.org': DomainBlock('keepblockingme.org'),
}
# allowlists = {
# 'testlist': [ DomainBlock('removeme.org', 'noop'), ]
# }
merged = apply_allowlists(merged, conf, {})
with pytest.raises(KeyError):
merged['removeme.org']
def test_allowlist_removes_domain():
"""Test that an item in an allowlist removes entries from merged
"""
conf = shim_argparse()
merged = {
'example.org': DomainBlock('example.org'),
'example2.org': DomainBlock('example.org'),
'removeme.org': DomainBlock('removeme.org'),
'keepblockingme.org': DomainBlock('keepblockingme.org'),
}
allowlists = {
'testlist': [ DomainBlock('removeme.org', 'noop'), ]
}
merged = apply_allowlists(merged, conf, allowlists)
with pytest.raises(KeyError):
merged['removeme.org']

View File

@ -1,15 +1,8 @@
"""Test the commandline defined parameters correctly
"""
from util import shim_argparse
from fediblockhole import setup_argparse, augment_args
def shim_argparse(testargv: list=[], tomldata: str=None):
"""Helper function to parse test args
"""
ap = setup_argparse()
args = ap.parse_args(testargv)
args = augment_args(args, tomldata)
return args
def test_cmdline_no_configfile():
""" Test bare command with no configfile
"""
@ -38,3 +31,17 @@ def test_cmdline_mergeplan_min():
args = ap.parse_args(['-m', 'min'])
assert args.mergeplan == 'min'
def test_set_allow_domain():
"""Set a single allow domain on commandline"""
ap = setup_argparse()
args = ap.parse_args(['-A', 'example.org'])
assert args.allow_domains == ['example.org']
def test_set_multiple_allow_domains():
"""Set multiple allow domains on commandline"""
ap = setup_argparse()
args = ap.parse_args(['-A', 'example.org', '-A', 'example2.org', '-A', 'example3.org'])
assert args.allow_domains == ['example.org', 'example2.org', 'example3.org']

View File

@ -1,15 +1,8 @@
"""Test the config file is loading parameters correctly
"""
from util import shim_argparse
from fediblockhole import setup_argparse, augment_args
def shim_argparse(testargv: list=[], tomldata: str=None):
"""Helper function to parse test args
"""
ap = setup_argparse()
args = ap.parse_args(testargv)
args = augment_args(args, tomldata)
return args
def test_parse_tomldata():
tomldata = """
# Test TOML config for FediBlockHole
@ -45,3 +38,14 @@ def test_set_mergeplan_min():
assert args.mergeplan == 'min'
def test_set_allowlists():
tomldata = """# Comment on config
allowlist_url_sources = [ { url='file:///path/to/allowlist', format='csv'} ]
"""
args = shim_argparse([], tomldata)
assert args.mergeplan == 'max'
assert args.allowlist_url_sources == [{
'url': 'file:///path/to/allowlist',
'format': 'csv',
}]

View File

@ -72,12 +72,3 @@ def test_compare_diff_sevs_2():
b = DomainBlock('example1.org', 'noop')
assert a != b
def test_suspend_rejects():
"""A suspend should reject_media and reject_reports
"""
a = DomainBlock('example.org', 'suspend')
assert a.severity.level == SeverityLevel.SUSPEND
assert a.reject_media == True
assert a.reject_reports == True