Removed redundant global vars.

Refactored fetch from URLs and instances.
Change API delay to be in calls per hour.
Improved check_followed_severity() behaviour.
Updated sample config to include new format.
Added support for per-instance-source import_fields.
BlockSeverity of 'suspend' implies reject_media and reject_reports.
This commit is contained in:
Justin Warren 2023-01-12 09:09:06 +11:00
parent 66f0373cbe
commit 327a44d907
No known key found for this signature in database
5 changed files with 124 additions and 74 deletions

View File

@ -3,6 +3,8 @@
# Otherwise, `token` is a Bearer token authorised to read domain_blocks.
# If `admin` = True, use the more detailed admin API, which requires a token with a
# higher level of authorization.
# If `import_fields` are provided, only import these fields from the instance.
# Overrides the global `import_fields` setting.
blocklist_instance_sources = [
# { domain = 'public.blocklist'}, # an instance with a public list of domain_blocks
# { domain = 'jorts.horse', token = '<a_different_token>' }, # user accessible block list
@ -10,9 +12,13 @@ blocklist_instance_sources = [
]
# List of URLs to read csv blocklists from
# Format tells the parser which format to use when parsing the blocklist
# max_severity tells the parser to override any severities that are higher than this value
# import_fields tells the parser to only import that set of fields from a specific source
blocklist_url_sources = [
# 'file:///etc/fediblockhole/blocklist-01.csv',
'https://raw.githubusercontent.com/eigenmagic/fediblockhole/main/samples/demo-blocklist-01.csv',
# { url = 'file:///home/daedalus/src/fediblockhole/samples/demo-blocklist-01.csv', format = 'csv' },
{ url = 'https://raw.githubusercontent.com/eigenmagic/fediblockhole/main/samples/demo-blocklist-01.csv', format = 'csv' },
]
# List of instances to write blocklist to

View File

@ -25,29 +25,17 @@ log = logging.getLogger('fediblockhole')
# Max size of a URL-fetched blocklist
URL_BLOCKLIST_MAXSIZE = 1024 ** 3
# The relative severity levels of blocks
SEVERITY = {
'noop': 0,
'silence': 1,
'suspend': 2,
}
# Default for 'reject_media' setting for each severity level
REJECT_MEDIA_DEFAULT = {
'noop': False,
'silence': True,
'suspend': True,
}
# Default for 'reject_reports' setting for each severity level
REJECT_REPORTS_DEFAULT = {
'noop': False,
'silence': True,
'suspend': True,
}
# Wait at most this long for a remote server to respond
REQUEST_TIMEOUT=30
REQUEST_TIMEOUT = 30
# Time to wait between instance API calls to we don't melt them
API_CALL_DELAY = 3600 / 300 # 300 API calls per hour
# We always import the domain and the severity
IMPORT_FIELDS = ['domain', 'severity']
# We always export the domain and the severity
EXPORT_FIELDS = ['domain', 'severity']
def sync_blocklists(conf: dict):
"""Sync instance blocklists from remote sources.
@ -58,40 +46,25 @@ def sync_blocklists(conf: dict):
# We will merge these later using a merge algorithm we choose.
# Always import these fields
import_fields = ['domain', 'severity']
import_fields = IMPORT_FIELDS
# Add extra import fields if defined in config
import_fields.extend(conf.import_fields)
# Always export these fields
export_fields = ['domain', 'severity']
export_fields = EXPORT_FIELDS
# Add extra export fields if defined in config
export_fields.extend(conf.export_fields)
blocklists = {}
# Fetch blocklists from URLs
if not conf.no_fetch_url:
log.info("Fetching domain blocks from URLs...")
for listurl in conf.blocklist_url_sources:
url = listurl['url']
max_severity = listurl.get('max_severity', 'suspend')
listformat = listurl.get('format', 'csv')
with urlr.urlopen(url) as fp:
rawdata = fp.read(URL_BLOCKLIST_MAXSIZE).decode('utf-8')
blocklists[url] = parse_blocklist(rawdata, listformat, import_fields, max_severity)
if conf.save_intermediate:
save_intermediate_blocklist(blocklists[url], url, conf.savedir, export_fields)
blocklists = fetch_from_urls(blocklists, conf.blocklist_url_sources,
import_fields, conf.save_intermediate, conf.savedir, export_fields)
# Fetch blocklists from remote instances
if not conf.no_fetch_instance:
log.info("Fetching domain blocks from instances...")
for blocklist_src in conf.blocklist_instance_sources:
domain = blocklist_src['domain']
admin = blocklist_src.get('admin', False)
token = blocklist_src.get('token', None)
blocklists[domain] = fetch_instance_blocklist(domain, token, admin, import_fields)
if conf.save_intermediate:
save_intermediate_blocklist(blocklists[domain], domain, conf.savedir, export_fields)
blocklists = fetch_from_instances(blocklists, conf.blocklist_instance_sources,
import_fields, conf.save_intermediate, conf.savedir, export_fields)
# Merge blocklists into an update dict
merged = merge_blocklists(blocklists, conf.mergeplan)
@ -108,9 +81,64 @@ def sync_blocklists(conf: dict):
max_followed_severity = BlockSeverity(dest.get('max_followed_severity', 'silence'))
push_blocklist(token, domain, merged.values(), conf.dryrun, import_fields, max_followed_severity)
def fetch_from_urls(blocklists: dict, url_sources: dict,
import_fields: list=IMPORT_FIELDS,
save_intermediate: bool=False,
savedir: str=None, export_fields: list=EXPORT_FIELDS) -> dict:
"""Fetch blocklists from URL sources
@param blocklists: A dict of existing blocklists, keyed by source
@param url_sources: A dict of configuration info for url sources
@returns: A dict of blocklists, same as input, but (possibly) modified
"""
log.info("Fetching domain blocks from URLs...")
for item in url_sources:
url = item['url']
# If import fields are provided, they override the global ones passed in
source_import_fields = item.get('import_fields', None)
if source_import_fields:
# Ensure we always use the default fields
import_fields = IMPORT_FIELDS.extend(source_import_fields)
max_severity = item.get('max_severity', 'suspend')
listformat = item.get('format', 'csv')
with urlr.urlopen(url) as fp:
rawdata = fp.read(URL_BLOCKLIST_MAXSIZE).decode('utf-8')
blocklists[url] = parse_blocklist(rawdata, listformat, import_fields, max_severity)
if save_intermediate:
save_intermediate_blocklist(blocklists[url], url, savedir, export_fields)
return blocklists
def fetch_from_instances(blocklists: dict, sources: dict,
import_fields: list=IMPORT_FIELDS,
save_intermediate: bool=False,
savedir: str=None, export_fields: list=EXPORT_FIELDS) -> dict:
"""Fetch blocklists from other instances
@param blocklists: A dict of existing blocklists, keyed by source
@param url_sources: A dict of configuration info for url sources
@returns: A dict of blocklists, same as input, but (possibly) modified
"""
log.info("Fetching domain blocks from instances...")
for item in sources:
domain = item['domain']
admin = item.get('admin', False)
token = item.get('token', None)
# If import fields are provided, they override the global ones passed in
source_import_fields = item.get('import_fields', None)
if source_import_fields:
# Ensure we always use the default fields
import_fields = IMPORT_FIELDS.extend(source_import_fields)
# Add the blocklist with the domain as the source key
blocklists[domain] = fetch_instance_blocklist(domain, token, admin, import_fields)
if save_intermediate:
save_intermediate_blocklist(blocklists[domain], domain, savedir, export_fields)
return blocklists
def merge_blocklists(blocklists: dict, mergeplan: str='max') -> dict:
"""Merge fetched remote blocklists into a bulk update
@param mergeplan: An optional method of merging overlapping block definitions
'max' (the default) uses the highest severity block found
'min' uses the lowest severity block found
@ -367,13 +395,16 @@ def check_followed_severity(host: str, token: str, domain: str,
max_followed_severity: BlockSeverity=BlockSeverity('silence')):
"""Check an instance to see if it has followers of a to-be-blocked instance"""
log.debug("Checking followed severity...")
# Return straight away if we're not increasing the severity
if severity <= max_followed_severity:
return severity
# If the instance has accounts that follow people on the to-be-blocked domain,
# limit the maximum severity to the configured `max_followed_severity`.
log.debug("checking for instance follows...")
follows = fetch_instance_follows(token, host, domain)
time.sleep(API_CALL_DELAY)
if follows > 0:
log.debug(f"Instance {host} has {follows} followers of accounts at {domain}.")
if severity > max_followed_severity:
@ -465,15 +496,17 @@ def push_blocklist(token: str, host: str, blocklist: list[dict],
# Is the severity changing?
if 'severity' in change_needed:
# Confirm if we really want to change the severity
# If we still have followers of the remote domain, we may not
# want to go all the way to full suspend, depending on the configuration
newseverity = check_followed_severity(host, token, oldblock.domain, newblock.severity, max_followed_severity)
if newseverity != oldblock.severity:
newblock.severity = newseverity
else:
log.info("Keeping severity of block the same to avoid disrupting followers.")
change_needed.remove('severity')
log.debug("Severity change requested, checking...")
if newseverity > oldblock.severity:
# Confirm if we really want to change the severity
# If we still have followers of the remote domain, we may not
# want to go all the way to full suspend, depending on the configuration
newseverity = check_followed_severity(host, token, oldblock.domain, newblock.severity, max_followed_severity)
if newseverity != oldblock.severity:
newblock.severity = newseverity
else:
log.info("Keeping severity of block the same to avoid disrupting followers.")
change_needed.remove('severity')
if change_needed:
log.info(f"Change detected. Need to update {change_needed} for domain block for {oldblock.domain}")
@ -484,7 +517,7 @@ def push_blocklist(token: str, host: str, blocklist: list[dict],
if not dryrun:
update_known_block(token, host, blockdata)
# add a pause here so we don't melt the instance
time.sleep(1)
time.sleep(API_CALL_DELAY)
else:
log.info("Dry run selected. Not applying changes.")
@ -495,24 +528,14 @@ def push_blocklist(token: str, host: str, blocklist: list[dict],
else:
# This is a new block for the target instance, so we
# need to add a block rather than update an existing one
# blockdata = {
# 'domain': newblock.domain,
# # Default to Silence if nothing is specified
# 'severity': newblock.get('severity', 'silence'),
# 'public_comment': newblock.get('public_comment', ''),
# 'private_comment': newblock.get('private_comment', ''),
# 'reject_media': newblock.get('reject_media', False),
# 'reject_reports': newblock.get('reject_reports', False),
# 'obfuscate': newblock.get('obfuscate', False),
# }
log.info(f"Adding new block: {newblock}...")
# Make sure the new block doesn't clobber a domain with followers
newblock.severity = check_followed_severity(host, token, newblock.domain, newblock.severity, max_followed_severity)
log.info(f"Adding new block: {newblock}...")
if not dryrun:
add_block(token, host, newblock)
# add a pause here so we don't melt the instance
time.sleep(1)
time.sleep(API_CALL_DELAY)
else:
log.info("Dry run selected. Not adding block.")

View File

@ -133,11 +133,23 @@ class DomainBlock(object):
self.reject_reports = reject_reports
self.obfuscate = obfuscate
self.id = id
self.severity = severity
if isinstance(severity, BlockSeverity):
self.severity = severity
@property
def severity(self):
return self._severity
@severity.setter
def severity(self, sev):
if isinstance(sev, BlockSeverity):
self._severity = sev
else:
self.severity = BlockSeverity(severity)
self._severity = BlockSeverity(sev)
# Suspend implies reject_media,reject_reports == True
if self._severity.level == SeverityLevel.SUSPEND:
self.reject_media = True
self.reject_reports = True
def _asdict(self):
"""Return a dict version of this object

View File

@ -72,3 +72,12 @@ def test_compare_diff_sevs_2():
b = DomainBlock('example1.org', 'noop')
assert a != b
def test_suspend_rejects():
"""A suspend should reject_media and reject_reports
"""
a = DomainBlock('example.org', 'suspend')
assert a.severity.level == SeverityLevel.SUSPEND
assert a.reject_media == True
assert a.reject_reports == True

View File

@ -231,11 +231,11 @@ def test_mergeplan_same_min_bools_true():
assert r.obfuscate == True
def test_mergeplan_max_bools():
a = DomainBlock('example.org', 'suspend', '', '', True, False, True)
a = DomainBlock('example.org', 'suspend', '', '', True, True, True)
b = DomainBlock('example.org', 'noop', '', '', False, False, False)
r = apply_mergeplan(a, b, 'max')
assert r.reject_media == True
assert r.reject_reports == False
assert r.reject_reports == True
assert r.obfuscate == True