Removed redundant global vars.
Refactored fetch from URLs and instances. Change API delay to be in calls per hour. Improved check_followed_severity() behaviour. Updated sample config to include new format. Added support for per-instance-source import_fields. BlockSeverity of 'suspend' implies reject_media and reject_reports.
This commit is contained in:
parent
66f0373cbe
commit
327a44d907
|
@ -3,6 +3,8 @@
|
|||
# Otherwise, `token` is a Bearer token authorised to read domain_blocks.
|
||||
# If `admin` = True, use the more detailed admin API, which requires a token with a
|
||||
# higher level of authorization.
|
||||
# If `import_fields` are provided, only import these fields from the instance.
|
||||
# Overrides the global `import_fields` setting.
|
||||
blocklist_instance_sources = [
|
||||
# { domain = 'public.blocklist'}, # an instance with a public list of domain_blocks
|
||||
# { domain = 'jorts.horse', token = '<a_different_token>' }, # user accessible block list
|
||||
|
@ -10,9 +12,13 @@ blocklist_instance_sources = [
|
|||
]
|
||||
|
||||
# List of URLs to read csv blocklists from
|
||||
# Format tells the parser which format to use when parsing the blocklist
|
||||
# max_severity tells the parser to override any severities that are higher than this value
|
||||
# import_fields tells the parser to only import that set of fields from a specific source
|
||||
blocklist_url_sources = [
|
||||
# 'file:///etc/fediblockhole/blocklist-01.csv',
|
||||
'https://raw.githubusercontent.com/eigenmagic/fediblockhole/main/samples/demo-blocklist-01.csv',
|
||||
# { url = 'file:///home/daedalus/src/fediblockhole/samples/demo-blocklist-01.csv', format = 'csv' },
|
||||
{ url = 'https://raw.githubusercontent.com/eigenmagic/fediblockhole/main/samples/demo-blocklist-01.csv', format = 'csv' },
|
||||
|
||||
]
|
||||
|
||||
# List of instances to write blocklist to
|
||||
|
|
|
@ -25,30 +25,18 @@ log = logging.getLogger('fediblockhole')
|
|||
# Max size of a URL-fetched blocklist
|
||||
URL_BLOCKLIST_MAXSIZE = 1024 ** 3
|
||||
|
||||
# The relative severity levels of blocks
|
||||
SEVERITY = {
|
||||
'noop': 0,
|
||||
'silence': 1,
|
||||
'suspend': 2,
|
||||
}
|
||||
|
||||
# Default for 'reject_media' setting for each severity level
|
||||
REJECT_MEDIA_DEFAULT = {
|
||||
'noop': False,
|
||||
'silence': True,
|
||||
'suspend': True,
|
||||
}
|
||||
|
||||
# Default for 'reject_reports' setting for each severity level
|
||||
REJECT_REPORTS_DEFAULT = {
|
||||
'noop': False,
|
||||
'silence': True,
|
||||
'suspend': True,
|
||||
}
|
||||
|
||||
# Wait at most this long for a remote server to respond
|
||||
REQUEST_TIMEOUT = 30
|
||||
|
||||
# Time to wait between instance API calls to we don't melt them
|
||||
API_CALL_DELAY = 3600 / 300 # 300 API calls per hour
|
||||
|
||||
# We always import the domain and the severity
|
||||
IMPORT_FIELDS = ['domain', 'severity']
|
||||
|
||||
# We always export the domain and the severity
|
||||
EXPORT_FIELDS = ['domain', 'severity']
|
||||
|
||||
def sync_blocklists(conf: dict):
|
||||
"""Sync instance blocklists from remote sources.
|
||||
|
||||
|
@ -58,40 +46,25 @@ def sync_blocklists(conf: dict):
|
|||
# We will merge these later using a merge algorithm we choose.
|
||||
|
||||
# Always import these fields
|
||||
import_fields = ['domain', 'severity']
|
||||
import_fields = IMPORT_FIELDS
|
||||
# Add extra import fields if defined in config
|
||||
import_fields.extend(conf.import_fields)
|
||||
|
||||
# Always export these fields
|
||||
export_fields = ['domain', 'severity']
|
||||
export_fields = EXPORT_FIELDS
|
||||
# Add extra export fields if defined in config
|
||||
export_fields.extend(conf.export_fields)
|
||||
|
||||
blocklists = {}
|
||||
# Fetch blocklists from URLs
|
||||
if not conf.no_fetch_url:
|
||||
log.info("Fetching domain blocks from URLs...")
|
||||
for listurl in conf.blocklist_url_sources:
|
||||
url = listurl['url']
|
||||
max_severity = listurl.get('max_severity', 'suspend')
|
||||
listformat = listurl.get('format', 'csv')
|
||||
with urlr.urlopen(url) as fp:
|
||||
rawdata = fp.read(URL_BLOCKLIST_MAXSIZE).decode('utf-8')
|
||||
blocklists[url] = parse_blocklist(rawdata, listformat, import_fields, max_severity)
|
||||
|
||||
if conf.save_intermediate:
|
||||
save_intermediate_blocklist(blocklists[url], url, conf.savedir, export_fields)
|
||||
blocklists = fetch_from_urls(blocklists, conf.blocklist_url_sources,
|
||||
import_fields, conf.save_intermediate, conf.savedir, export_fields)
|
||||
|
||||
# Fetch blocklists from remote instances
|
||||
if not conf.no_fetch_instance:
|
||||
log.info("Fetching domain blocks from instances...")
|
||||
for blocklist_src in conf.blocklist_instance_sources:
|
||||
domain = blocklist_src['domain']
|
||||
admin = blocklist_src.get('admin', False)
|
||||
token = blocklist_src.get('token', None)
|
||||
blocklists[domain] = fetch_instance_blocklist(domain, token, admin, import_fields)
|
||||
if conf.save_intermediate:
|
||||
save_intermediate_blocklist(blocklists[domain], domain, conf.savedir, export_fields)
|
||||
blocklists = fetch_from_instances(blocklists, conf.blocklist_instance_sources,
|
||||
import_fields, conf.save_intermediate, conf.savedir, export_fields)
|
||||
|
||||
# Merge blocklists into an update dict
|
||||
merged = merge_blocklists(blocklists, conf.mergeplan)
|
||||
|
@ -108,9 +81,64 @@ def sync_blocklists(conf: dict):
|
|||
max_followed_severity = BlockSeverity(dest.get('max_followed_severity', 'silence'))
|
||||
push_blocklist(token, domain, merged.values(), conf.dryrun, import_fields, max_followed_severity)
|
||||
|
||||
def fetch_from_urls(blocklists: dict, url_sources: dict,
|
||||
import_fields: list=IMPORT_FIELDS,
|
||||
save_intermediate: bool=False,
|
||||
savedir: str=None, export_fields: list=EXPORT_FIELDS) -> dict:
|
||||
"""Fetch blocklists from URL sources
|
||||
@param blocklists: A dict of existing blocklists, keyed by source
|
||||
@param url_sources: A dict of configuration info for url sources
|
||||
@returns: A dict of blocklists, same as input, but (possibly) modified
|
||||
"""
|
||||
log.info("Fetching domain blocks from URLs...")
|
||||
|
||||
for item in url_sources:
|
||||
url = item['url']
|
||||
# If import fields are provided, they override the global ones passed in
|
||||
source_import_fields = item.get('import_fields', None)
|
||||
if source_import_fields:
|
||||
# Ensure we always use the default fields
|
||||
import_fields = IMPORT_FIELDS.extend(source_import_fields)
|
||||
|
||||
max_severity = item.get('max_severity', 'suspend')
|
||||
listformat = item.get('format', 'csv')
|
||||
with urlr.urlopen(url) as fp:
|
||||
rawdata = fp.read(URL_BLOCKLIST_MAXSIZE).decode('utf-8')
|
||||
blocklists[url] = parse_blocklist(rawdata, listformat, import_fields, max_severity)
|
||||
|
||||
if save_intermediate:
|
||||
save_intermediate_blocklist(blocklists[url], url, savedir, export_fields)
|
||||
|
||||
return blocklists
|
||||
|
||||
def fetch_from_instances(blocklists: dict, sources: dict,
|
||||
import_fields: list=IMPORT_FIELDS,
|
||||
save_intermediate: bool=False,
|
||||
savedir: str=None, export_fields: list=EXPORT_FIELDS) -> dict:
|
||||
"""Fetch blocklists from other instances
|
||||
@param blocklists: A dict of existing blocklists, keyed by source
|
||||
@param url_sources: A dict of configuration info for url sources
|
||||
@returns: A dict of blocklists, same as input, but (possibly) modified
|
||||
"""
|
||||
log.info("Fetching domain blocks from instances...")
|
||||
for item in sources:
|
||||
domain = item['domain']
|
||||
admin = item.get('admin', False)
|
||||
token = item.get('token', None)
|
||||
# If import fields are provided, they override the global ones passed in
|
||||
source_import_fields = item.get('import_fields', None)
|
||||
if source_import_fields:
|
||||
# Ensure we always use the default fields
|
||||
import_fields = IMPORT_FIELDS.extend(source_import_fields)
|
||||
|
||||
# Add the blocklist with the domain as the source key
|
||||
blocklists[domain] = fetch_instance_blocklist(domain, token, admin, import_fields)
|
||||
if save_intermediate:
|
||||
save_intermediate_blocklist(blocklists[domain], domain, savedir, export_fields)
|
||||
return blocklists
|
||||
|
||||
def merge_blocklists(blocklists: dict, mergeplan: str='max') -> dict:
|
||||
"""Merge fetched remote blocklists into a bulk update
|
||||
|
||||
@param mergeplan: An optional method of merging overlapping block definitions
|
||||
'max' (the default) uses the highest severity block found
|
||||
'min' uses the lowest severity block found
|
||||
|
@ -367,13 +395,16 @@ def check_followed_severity(host: str, token: str, domain: str,
|
|||
max_followed_severity: BlockSeverity=BlockSeverity('silence')):
|
||||
"""Check an instance to see if it has followers of a to-be-blocked instance"""
|
||||
|
||||
log.debug("Checking followed severity...")
|
||||
# Return straight away if we're not increasing the severity
|
||||
if severity <= max_followed_severity:
|
||||
return severity
|
||||
|
||||
# If the instance has accounts that follow people on the to-be-blocked domain,
|
||||
# limit the maximum severity to the configured `max_followed_severity`.
|
||||
log.debug("checking for instance follows...")
|
||||
follows = fetch_instance_follows(token, host, domain)
|
||||
time.sleep(API_CALL_DELAY)
|
||||
if follows > 0:
|
||||
log.debug(f"Instance {host} has {follows} followers of accounts at {domain}.")
|
||||
if severity > max_followed_severity:
|
||||
|
@ -465,6 +496,8 @@ def push_blocklist(token: str, host: str, blocklist: list[dict],
|
|||
|
||||
# Is the severity changing?
|
||||
if 'severity' in change_needed:
|
||||
log.debug("Severity change requested, checking...")
|
||||
if newseverity > oldblock.severity:
|
||||
# Confirm if we really want to change the severity
|
||||
# If we still have followers of the remote domain, we may not
|
||||
# want to go all the way to full suspend, depending on the configuration
|
||||
|
@ -484,7 +517,7 @@ def push_blocklist(token: str, host: str, blocklist: list[dict],
|
|||
if not dryrun:
|
||||
update_known_block(token, host, blockdata)
|
||||
# add a pause here so we don't melt the instance
|
||||
time.sleep(1)
|
||||
time.sleep(API_CALL_DELAY)
|
||||
else:
|
||||
log.info("Dry run selected. Not applying changes.")
|
||||
|
||||
|
@ -495,24 +528,14 @@ def push_blocklist(token: str, host: str, blocklist: list[dict],
|
|||
else:
|
||||
# This is a new block for the target instance, so we
|
||||
# need to add a block rather than update an existing one
|
||||
# blockdata = {
|
||||
# 'domain': newblock.domain,
|
||||
# # Default to Silence if nothing is specified
|
||||
# 'severity': newblock.get('severity', 'silence'),
|
||||
# 'public_comment': newblock.get('public_comment', ''),
|
||||
# 'private_comment': newblock.get('private_comment', ''),
|
||||
# 'reject_media': newblock.get('reject_media', False),
|
||||
# 'reject_reports': newblock.get('reject_reports', False),
|
||||
# 'obfuscate': newblock.get('obfuscate', False),
|
||||
# }
|
||||
log.info(f"Adding new block: {newblock}...")
|
||||
|
||||
# Make sure the new block doesn't clobber a domain with followers
|
||||
newblock.severity = check_followed_severity(host, token, newblock.domain, newblock.severity, max_followed_severity)
|
||||
log.info(f"Adding new block: {newblock}...")
|
||||
if not dryrun:
|
||||
add_block(token, host, newblock)
|
||||
# add a pause here so we don't melt the instance
|
||||
time.sleep(1)
|
||||
time.sleep(API_CALL_DELAY)
|
||||
else:
|
||||
log.info("Dry run selected. Not adding block.")
|
||||
|
||||
|
|
|
@ -133,11 +133,23 @@ class DomainBlock(object):
|
|||
self.reject_reports = reject_reports
|
||||
self.obfuscate = obfuscate
|
||||
self.id = id
|
||||
|
||||
if isinstance(severity, BlockSeverity):
|
||||
self.severity = severity
|
||||
|
||||
@property
|
||||
def severity(self):
|
||||
return self._severity
|
||||
|
||||
@severity.setter
|
||||
def severity(self, sev):
|
||||
if isinstance(sev, BlockSeverity):
|
||||
self._severity = sev
|
||||
else:
|
||||
self.severity = BlockSeverity(severity)
|
||||
self._severity = BlockSeverity(sev)
|
||||
|
||||
# Suspend implies reject_media,reject_reports == True
|
||||
if self._severity.level == SeverityLevel.SUSPEND:
|
||||
self.reject_media = True
|
||||
self.reject_reports = True
|
||||
|
||||
def _asdict(self):
|
||||
"""Return a dict version of this object
|
||||
|
|
|
@ -72,3 +72,12 @@ def test_compare_diff_sevs_2():
|
|||
b = DomainBlock('example1.org', 'noop')
|
||||
|
||||
assert a != b
|
||||
|
||||
def test_suspend_rejects():
|
||||
"""A suspend should reject_media and reject_reports
|
||||
"""
|
||||
a = DomainBlock('example.org', 'suspend')
|
||||
|
||||
assert a.severity.level == SeverityLevel.SUSPEND
|
||||
assert a.reject_media == True
|
||||
assert a.reject_reports == True
|
|
@ -231,11 +231,11 @@ def test_mergeplan_same_min_bools_true():
|
|||
assert r.obfuscate == True
|
||||
|
||||
def test_mergeplan_max_bools():
|
||||
a = DomainBlock('example.org', 'suspend', '', '', True, False, True)
|
||||
a = DomainBlock('example.org', 'suspend', '', '', True, True, True)
|
||||
b = DomainBlock('example.org', 'noop', '', '', False, False, False)
|
||||
|
||||
r = apply_mergeplan(a, b, 'max')
|
||||
|
||||
assert r.reject_media == True
|
||||
assert r.reject_reports == False
|
||||
assert r.reject_reports == True
|
||||
assert r.obfuscate == True
|
Loading…
Reference in New Issue