Removed redundant global vars.

Refactored fetch from URLs and instances.
Change API delay to be in calls per hour.
Improved check_followed_severity() behaviour.
Updated sample config to include new format.
Added support for per-instance-source import_fields.
BlockSeverity of 'suspend' implies reject_media and reject_reports.
This commit is contained in:
Justin Warren 2023-01-12 09:09:06 +11:00
parent 66f0373cbe
commit 327a44d907
No known key found for this signature in database
5 changed files with 124 additions and 74 deletions

View File

@ -3,6 +3,8 @@
# Otherwise, `token` is a Bearer token authorised to read domain_blocks. # Otherwise, `token` is a Bearer token authorised to read domain_blocks.
# If `admin` = True, use the more detailed admin API, which requires a token with a # If `admin` = True, use the more detailed admin API, which requires a token with a
# higher level of authorization. # higher level of authorization.
# If `import_fields` are provided, only import these fields from the instance.
# Overrides the global `import_fields` setting.
blocklist_instance_sources = [ blocklist_instance_sources = [
# { domain = 'public.blocklist'}, # an instance with a public list of domain_blocks # { domain = 'public.blocklist'}, # an instance with a public list of domain_blocks
# { domain = 'jorts.horse', token = '<a_different_token>' }, # user accessible block list # { domain = 'jorts.horse', token = '<a_different_token>' }, # user accessible block list
@ -10,9 +12,13 @@ blocklist_instance_sources = [
] ]
# List of URLs to read csv blocklists from # List of URLs to read csv blocklists from
# Format tells the parser which format to use when parsing the blocklist
# max_severity tells the parser to override any severities that are higher than this value
# import_fields tells the parser to only import that set of fields from a specific source
blocklist_url_sources = [ blocklist_url_sources = [
# 'file:///etc/fediblockhole/blocklist-01.csv', # { url = 'file:///home/daedalus/src/fediblockhole/samples/demo-blocklist-01.csv', format = 'csv' },
'https://raw.githubusercontent.com/eigenmagic/fediblockhole/main/samples/demo-blocklist-01.csv', { url = 'https://raw.githubusercontent.com/eigenmagic/fediblockhole/main/samples/demo-blocklist-01.csv', format = 'csv' },
] ]
# List of instances to write blocklist to # List of instances to write blocklist to

View File

@ -25,29 +25,17 @@ log = logging.getLogger('fediblockhole')
# Max size of a URL-fetched blocklist # Max size of a URL-fetched blocklist
URL_BLOCKLIST_MAXSIZE = 1024 ** 3 URL_BLOCKLIST_MAXSIZE = 1024 ** 3
# The relative severity levels of blocks
SEVERITY = {
'noop': 0,
'silence': 1,
'suspend': 2,
}
# Default for 'reject_media' setting for each severity level
REJECT_MEDIA_DEFAULT = {
'noop': False,
'silence': True,
'suspend': True,
}
# Default for 'reject_reports' setting for each severity level
REJECT_REPORTS_DEFAULT = {
'noop': False,
'silence': True,
'suspend': True,
}
# Wait at most this long for a remote server to respond # Wait at most this long for a remote server to respond
REQUEST_TIMEOUT=30 REQUEST_TIMEOUT = 30
# Time to wait between instance API calls to we don't melt them
API_CALL_DELAY = 3600 / 300 # 300 API calls per hour
# We always import the domain and the severity
IMPORT_FIELDS = ['domain', 'severity']
# We always export the domain and the severity
EXPORT_FIELDS = ['domain', 'severity']
def sync_blocklists(conf: dict): def sync_blocklists(conf: dict):
"""Sync instance blocklists from remote sources. """Sync instance blocklists from remote sources.
@ -58,40 +46,25 @@ def sync_blocklists(conf: dict):
# We will merge these later using a merge algorithm we choose. # We will merge these later using a merge algorithm we choose.
# Always import these fields # Always import these fields
import_fields = ['domain', 'severity'] import_fields = IMPORT_FIELDS
# Add extra import fields if defined in config # Add extra import fields if defined in config
import_fields.extend(conf.import_fields) import_fields.extend(conf.import_fields)
# Always export these fields # Always export these fields
export_fields = ['domain', 'severity'] export_fields = EXPORT_FIELDS
# Add extra export fields if defined in config # Add extra export fields if defined in config
export_fields.extend(conf.export_fields) export_fields.extend(conf.export_fields)
blocklists = {} blocklists = {}
# Fetch blocklists from URLs # Fetch blocklists from URLs
if not conf.no_fetch_url: if not conf.no_fetch_url:
log.info("Fetching domain blocks from URLs...") blocklists = fetch_from_urls(blocklists, conf.blocklist_url_sources,
for listurl in conf.blocklist_url_sources: import_fields, conf.save_intermediate, conf.savedir, export_fields)
url = listurl['url']
max_severity = listurl.get('max_severity', 'suspend')
listformat = listurl.get('format', 'csv')
with urlr.urlopen(url) as fp:
rawdata = fp.read(URL_BLOCKLIST_MAXSIZE).decode('utf-8')
blocklists[url] = parse_blocklist(rawdata, listformat, import_fields, max_severity)
if conf.save_intermediate:
save_intermediate_blocklist(blocklists[url], url, conf.savedir, export_fields)
# Fetch blocklists from remote instances # Fetch blocklists from remote instances
if not conf.no_fetch_instance: if not conf.no_fetch_instance:
log.info("Fetching domain blocks from instances...") blocklists = fetch_from_instances(blocklists, conf.blocklist_instance_sources,
for blocklist_src in conf.blocklist_instance_sources: import_fields, conf.save_intermediate, conf.savedir, export_fields)
domain = blocklist_src['domain']
admin = blocklist_src.get('admin', False)
token = blocklist_src.get('token', None)
blocklists[domain] = fetch_instance_blocklist(domain, token, admin, import_fields)
if conf.save_intermediate:
save_intermediate_blocklist(blocklists[domain], domain, conf.savedir, export_fields)
# Merge blocklists into an update dict # Merge blocklists into an update dict
merged = merge_blocklists(blocklists, conf.mergeplan) merged = merge_blocklists(blocklists, conf.mergeplan)
@ -108,9 +81,64 @@ def sync_blocklists(conf: dict):
max_followed_severity = BlockSeverity(dest.get('max_followed_severity', 'silence')) max_followed_severity = BlockSeverity(dest.get('max_followed_severity', 'silence'))
push_blocklist(token, domain, merged.values(), conf.dryrun, import_fields, max_followed_severity) push_blocklist(token, domain, merged.values(), conf.dryrun, import_fields, max_followed_severity)
def fetch_from_urls(blocklists: dict, url_sources: dict,
import_fields: list=IMPORT_FIELDS,
save_intermediate: bool=False,
savedir: str=None, export_fields: list=EXPORT_FIELDS) -> dict:
"""Fetch blocklists from URL sources
@param blocklists: A dict of existing blocklists, keyed by source
@param url_sources: A dict of configuration info for url sources
@returns: A dict of blocklists, same as input, but (possibly) modified
"""
log.info("Fetching domain blocks from URLs...")
for item in url_sources:
url = item['url']
# If import fields are provided, they override the global ones passed in
source_import_fields = item.get('import_fields', None)
if source_import_fields:
# Ensure we always use the default fields
import_fields = IMPORT_FIELDS.extend(source_import_fields)
max_severity = item.get('max_severity', 'suspend')
listformat = item.get('format', 'csv')
with urlr.urlopen(url) as fp:
rawdata = fp.read(URL_BLOCKLIST_MAXSIZE).decode('utf-8')
blocklists[url] = parse_blocklist(rawdata, listformat, import_fields, max_severity)
if save_intermediate:
save_intermediate_blocklist(blocklists[url], url, savedir, export_fields)
return blocklists
def fetch_from_instances(blocklists: dict, sources: dict,
import_fields: list=IMPORT_FIELDS,
save_intermediate: bool=False,
savedir: str=None, export_fields: list=EXPORT_FIELDS) -> dict:
"""Fetch blocklists from other instances
@param blocklists: A dict of existing blocklists, keyed by source
@param url_sources: A dict of configuration info for url sources
@returns: A dict of blocklists, same as input, but (possibly) modified
"""
log.info("Fetching domain blocks from instances...")
for item in sources:
domain = item['domain']
admin = item.get('admin', False)
token = item.get('token', None)
# If import fields are provided, they override the global ones passed in
source_import_fields = item.get('import_fields', None)
if source_import_fields:
# Ensure we always use the default fields
import_fields = IMPORT_FIELDS.extend(source_import_fields)
# Add the blocklist with the domain as the source key
blocklists[domain] = fetch_instance_blocklist(domain, token, admin, import_fields)
if save_intermediate:
save_intermediate_blocklist(blocklists[domain], domain, savedir, export_fields)
return blocklists
def merge_blocklists(blocklists: dict, mergeplan: str='max') -> dict: def merge_blocklists(blocklists: dict, mergeplan: str='max') -> dict:
"""Merge fetched remote blocklists into a bulk update """Merge fetched remote blocklists into a bulk update
@param mergeplan: An optional method of merging overlapping block definitions @param mergeplan: An optional method of merging overlapping block definitions
'max' (the default) uses the highest severity block found 'max' (the default) uses the highest severity block found
'min' uses the lowest severity block found 'min' uses the lowest severity block found
@ -367,13 +395,16 @@ def check_followed_severity(host: str, token: str, domain: str,
max_followed_severity: BlockSeverity=BlockSeverity('silence')): max_followed_severity: BlockSeverity=BlockSeverity('silence')):
"""Check an instance to see if it has followers of a to-be-blocked instance""" """Check an instance to see if it has followers of a to-be-blocked instance"""
log.debug("Checking followed severity...")
# Return straight away if we're not increasing the severity # Return straight away if we're not increasing the severity
if severity <= max_followed_severity: if severity <= max_followed_severity:
return severity return severity
# If the instance has accounts that follow people on the to-be-blocked domain, # If the instance has accounts that follow people on the to-be-blocked domain,
# limit the maximum severity to the configured `max_followed_severity`. # limit the maximum severity to the configured `max_followed_severity`.
log.debug("checking for instance follows...")
follows = fetch_instance_follows(token, host, domain) follows = fetch_instance_follows(token, host, domain)
time.sleep(API_CALL_DELAY)
if follows > 0: if follows > 0:
log.debug(f"Instance {host} has {follows} followers of accounts at {domain}.") log.debug(f"Instance {host} has {follows} followers of accounts at {domain}.")
if severity > max_followed_severity: if severity > max_followed_severity:
@ -465,15 +496,17 @@ def push_blocklist(token: str, host: str, blocklist: list[dict],
# Is the severity changing? # Is the severity changing?
if 'severity' in change_needed: if 'severity' in change_needed:
# Confirm if we really want to change the severity log.debug("Severity change requested, checking...")
# If we still have followers of the remote domain, we may not if newseverity > oldblock.severity:
# want to go all the way to full suspend, depending on the configuration # Confirm if we really want to change the severity
newseverity = check_followed_severity(host, token, oldblock.domain, newblock.severity, max_followed_severity) # If we still have followers of the remote domain, we may not
if newseverity != oldblock.severity: # want to go all the way to full suspend, depending on the configuration
newblock.severity = newseverity newseverity = check_followed_severity(host, token, oldblock.domain, newblock.severity, max_followed_severity)
else: if newseverity != oldblock.severity:
log.info("Keeping severity of block the same to avoid disrupting followers.") newblock.severity = newseverity
change_needed.remove('severity') else:
log.info("Keeping severity of block the same to avoid disrupting followers.")
change_needed.remove('severity')
if change_needed: if change_needed:
log.info(f"Change detected. Need to update {change_needed} for domain block for {oldblock.domain}") log.info(f"Change detected. Need to update {change_needed} for domain block for {oldblock.domain}")
@ -484,7 +517,7 @@ def push_blocklist(token: str, host: str, blocklist: list[dict],
if not dryrun: if not dryrun:
update_known_block(token, host, blockdata) update_known_block(token, host, blockdata)
# add a pause here so we don't melt the instance # add a pause here so we don't melt the instance
time.sleep(1) time.sleep(API_CALL_DELAY)
else: else:
log.info("Dry run selected. Not applying changes.") log.info("Dry run selected. Not applying changes.")
@ -495,24 +528,14 @@ def push_blocklist(token: str, host: str, blocklist: list[dict],
else: else:
# This is a new block for the target instance, so we # This is a new block for the target instance, so we
# need to add a block rather than update an existing one # need to add a block rather than update an existing one
# blockdata = { log.info(f"Adding new block: {newblock}...")
# 'domain': newblock.domain,
# # Default to Silence if nothing is specified
# 'severity': newblock.get('severity', 'silence'),
# 'public_comment': newblock.get('public_comment', ''),
# 'private_comment': newblock.get('private_comment', ''),
# 'reject_media': newblock.get('reject_media', False),
# 'reject_reports': newblock.get('reject_reports', False),
# 'obfuscate': newblock.get('obfuscate', False),
# }
# Make sure the new block doesn't clobber a domain with followers # Make sure the new block doesn't clobber a domain with followers
newblock.severity = check_followed_severity(host, token, newblock.domain, newblock.severity, max_followed_severity) newblock.severity = check_followed_severity(host, token, newblock.domain, newblock.severity, max_followed_severity)
log.info(f"Adding new block: {newblock}...")
if not dryrun: if not dryrun:
add_block(token, host, newblock) add_block(token, host, newblock)
# add a pause here so we don't melt the instance # add a pause here so we don't melt the instance
time.sleep(1) time.sleep(API_CALL_DELAY)
else: else:
log.info("Dry run selected. Not adding block.") log.info("Dry run selected. Not adding block.")

View File

@ -133,11 +133,23 @@ class DomainBlock(object):
self.reject_reports = reject_reports self.reject_reports = reject_reports
self.obfuscate = obfuscate self.obfuscate = obfuscate
self.id = id self.id = id
self.severity = severity
if isinstance(severity, BlockSeverity): @property
self.severity = severity def severity(self):
return self._severity
@severity.setter
def severity(self, sev):
if isinstance(sev, BlockSeverity):
self._severity = sev
else: else:
self.severity = BlockSeverity(severity) self._severity = BlockSeverity(sev)
# Suspend implies reject_media,reject_reports == True
if self._severity.level == SeverityLevel.SUSPEND:
self.reject_media = True
self.reject_reports = True
def _asdict(self): def _asdict(self):
"""Return a dict version of this object """Return a dict version of this object

View File

@ -71,4 +71,13 @@ def test_compare_diff_sevs_2():
a = DomainBlock('example1.org', 'suspend') a = DomainBlock('example1.org', 'suspend')
b = DomainBlock('example1.org', 'noop') b = DomainBlock('example1.org', 'noop')
assert a != b assert a != b
def test_suspend_rejects():
"""A suspend should reject_media and reject_reports
"""
a = DomainBlock('example.org', 'suspend')
assert a.severity.level == SeverityLevel.SUSPEND
assert a.reject_media == True
assert a.reject_reports == True

View File

@ -231,11 +231,11 @@ def test_mergeplan_same_min_bools_true():
assert r.obfuscate == True assert r.obfuscate == True
def test_mergeplan_max_bools(): def test_mergeplan_max_bools():
a = DomainBlock('example.org', 'suspend', '', '', True, False, True) a = DomainBlock('example.org', 'suspend', '', '', True, True, True)
b = DomainBlock('example.org', 'noop', '', '', False, False, False) b = DomainBlock('example.org', 'noop', '', '', False, False, False)
r = apply_mergeplan(a, b, 'max') r = apply_mergeplan(a, b, 'max')
assert r.reject_media == True assert r.reject_media == True
assert r.reject_reports == False assert r.reject_reports == True
assert r.obfuscate == True assert r.obfuscate == True