Removed redundant global vars.
Refactored fetch from URLs and instances. Change API delay to be in calls per hour. Improved check_followed_severity() behaviour. Updated sample config to include new format. Added support for per-instance-source import_fields. BlockSeverity of 'suspend' implies reject_media and reject_reports.
This commit is contained in:
parent
66f0373cbe
commit
327a44d907
|
@ -3,6 +3,8 @@
|
||||||
# Otherwise, `token` is a Bearer token authorised to read domain_blocks.
|
# Otherwise, `token` is a Bearer token authorised to read domain_blocks.
|
||||||
# If `admin` = True, use the more detailed admin API, which requires a token with a
|
# If `admin` = True, use the more detailed admin API, which requires a token with a
|
||||||
# higher level of authorization.
|
# higher level of authorization.
|
||||||
|
# If `import_fields` are provided, only import these fields from the instance.
|
||||||
|
# Overrides the global `import_fields` setting.
|
||||||
blocklist_instance_sources = [
|
blocklist_instance_sources = [
|
||||||
# { domain = 'public.blocklist'}, # an instance with a public list of domain_blocks
|
# { domain = 'public.blocklist'}, # an instance with a public list of domain_blocks
|
||||||
# { domain = 'jorts.horse', token = '<a_different_token>' }, # user accessible block list
|
# { domain = 'jorts.horse', token = '<a_different_token>' }, # user accessible block list
|
||||||
|
@ -10,9 +12,13 @@ blocklist_instance_sources = [
|
||||||
]
|
]
|
||||||
|
|
||||||
# List of URLs to read csv blocklists from
|
# List of URLs to read csv blocklists from
|
||||||
|
# Format tells the parser which format to use when parsing the blocklist
|
||||||
|
# max_severity tells the parser to override any severities that are higher than this value
|
||||||
|
# import_fields tells the parser to only import that set of fields from a specific source
|
||||||
blocklist_url_sources = [
|
blocklist_url_sources = [
|
||||||
# 'file:///etc/fediblockhole/blocklist-01.csv',
|
# { url = 'file:///home/daedalus/src/fediblockhole/samples/demo-blocklist-01.csv', format = 'csv' },
|
||||||
'https://raw.githubusercontent.com/eigenmagic/fediblockhole/main/samples/demo-blocklist-01.csv',
|
{ url = 'https://raw.githubusercontent.com/eigenmagic/fediblockhole/main/samples/demo-blocklist-01.csv', format = 'csv' },
|
||||||
|
|
||||||
]
|
]
|
||||||
|
|
||||||
# List of instances to write blocklist to
|
# List of instances to write blocklist to
|
||||||
|
|
|
@ -25,29 +25,17 @@ log = logging.getLogger('fediblockhole')
|
||||||
# Max size of a URL-fetched blocklist
|
# Max size of a URL-fetched blocklist
|
||||||
URL_BLOCKLIST_MAXSIZE = 1024 ** 3
|
URL_BLOCKLIST_MAXSIZE = 1024 ** 3
|
||||||
|
|
||||||
# The relative severity levels of blocks
|
|
||||||
SEVERITY = {
|
|
||||||
'noop': 0,
|
|
||||||
'silence': 1,
|
|
||||||
'suspend': 2,
|
|
||||||
}
|
|
||||||
|
|
||||||
# Default for 'reject_media' setting for each severity level
|
|
||||||
REJECT_MEDIA_DEFAULT = {
|
|
||||||
'noop': False,
|
|
||||||
'silence': True,
|
|
||||||
'suspend': True,
|
|
||||||
}
|
|
||||||
|
|
||||||
# Default for 'reject_reports' setting for each severity level
|
|
||||||
REJECT_REPORTS_DEFAULT = {
|
|
||||||
'noop': False,
|
|
||||||
'silence': True,
|
|
||||||
'suspend': True,
|
|
||||||
}
|
|
||||||
|
|
||||||
# Wait at most this long for a remote server to respond
|
# Wait at most this long for a remote server to respond
|
||||||
REQUEST_TIMEOUT=30
|
REQUEST_TIMEOUT = 30
|
||||||
|
|
||||||
|
# Time to wait between instance API calls to we don't melt them
|
||||||
|
API_CALL_DELAY = 3600 / 300 # 300 API calls per hour
|
||||||
|
|
||||||
|
# We always import the domain and the severity
|
||||||
|
IMPORT_FIELDS = ['domain', 'severity']
|
||||||
|
|
||||||
|
# We always export the domain and the severity
|
||||||
|
EXPORT_FIELDS = ['domain', 'severity']
|
||||||
|
|
||||||
def sync_blocklists(conf: dict):
|
def sync_blocklists(conf: dict):
|
||||||
"""Sync instance blocklists from remote sources.
|
"""Sync instance blocklists from remote sources.
|
||||||
|
@ -58,40 +46,25 @@ def sync_blocklists(conf: dict):
|
||||||
# We will merge these later using a merge algorithm we choose.
|
# We will merge these later using a merge algorithm we choose.
|
||||||
|
|
||||||
# Always import these fields
|
# Always import these fields
|
||||||
import_fields = ['domain', 'severity']
|
import_fields = IMPORT_FIELDS
|
||||||
# Add extra import fields if defined in config
|
# Add extra import fields if defined in config
|
||||||
import_fields.extend(conf.import_fields)
|
import_fields.extend(conf.import_fields)
|
||||||
|
|
||||||
# Always export these fields
|
# Always export these fields
|
||||||
export_fields = ['domain', 'severity']
|
export_fields = EXPORT_FIELDS
|
||||||
# Add extra export fields if defined in config
|
# Add extra export fields if defined in config
|
||||||
export_fields.extend(conf.export_fields)
|
export_fields.extend(conf.export_fields)
|
||||||
|
|
||||||
blocklists = {}
|
blocklists = {}
|
||||||
# Fetch blocklists from URLs
|
# Fetch blocklists from URLs
|
||||||
if not conf.no_fetch_url:
|
if not conf.no_fetch_url:
|
||||||
log.info("Fetching domain blocks from URLs...")
|
blocklists = fetch_from_urls(blocklists, conf.blocklist_url_sources,
|
||||||
for listurl in conf.blocklist_url_sources:
|
import_fields, conf.save_intermediate, conf.savedir, export_fields)
|
||||||
url = listurl['url']
|
|
||||||
max_severity = listurl.get('max_severity', 'suspend')
|
|
||||||
listformat = listurl.get('format', 'csv')
|
|
||||||
with urlr.urlopen(url) as fp:
|
|
||||||
rawdata = fp.read(URL_BLOCKLIST_MAXSIZE).decode('utf-8')
|
|
||||||
blocklists[url] = parse_blocklist(rawdata, listformat, import_fields, max_severity)
|
|
||||||
|
|
||||||
if conf.save_intermediate:
|
|
||||||
save_intermediate_blocklist(blocklists[url], url, conf.savedir, export_fields)
|
|
||||||
|
|
||||||
# Fetch blocklists from remote instances
|
# Fetch blocklists from remote instances
|
||||||
if not conf.no_fetch_instance:
|
if not conf.no_fetch_instance:
|
||||||
log.info("Fetching domain blocks from instances...")
|
blocklists = fetch_from_instances(blocklists, conf.blocklist_instance_sources,
|
||||||
for blocklist_src in conf.blocklist_instance_sources:
|
import_fields, conf.save_intermediate, conf.savedir, export_fields)
|
||||||
domain = blocklist_src['domain']
|
|
||||||
admin = blocklist_src.get('admin', False)
|
|
||||||
token = blocklist_src.get('token', None)
|
|
||||||
blocklists[domain] = fetch_instance_blocklist(domain, token, admin, import_fields)
|
|
||||||
if conf.save_intermediate:
|
|
||||||
save_intermediate_blocklist(blocklists[domain], domain, conf.savedir, export_fields)
|
|
||||||
|
|
||||||
# Merge blocklists into an update dict
|
# Merge blocklists into an update dict
|
||||||
merged = merge_blocklists(blocklists, conf.mergeplan)
|
merged = merge_blocklists(blocklists, conf.mergeplan)
|
||||||
|
@ -108,9 +81,64 @@ def sync_blocklists(conf: dict):
|
||||||
max_followed_severity = BlockSeverity(dest.get('max_followed_severity', 'silence'))
|
max_followed_severity = BlockSeverity(dest.get('max_followed_severity', 'silence'))
|
||||||
push_blocklist(token, domain, merged.values(), conf.dryrun, import_fields, max_followed_severity)
|
push_blocklist(token, domain, merged.values(), conf.dryrun, import_fields, max_followed_severity)
|
||||||
|
|
||||||
|
def fetch_from_urls(blocklists: dict, url_sources: dict,
|
||||||
|
import_fields: list=IMPORT_FIELDS,
|
||||||
|
save_intermediate: bool=False,
|
||||||
|
savedir: str=None, export_fields: list=EXPORT_FIELDS) -> dict:
|
||||||
|
"""Fetch blocklists from URL sources
|
||||||
|
@param blocklists: A dict of existing blocklists, keyed by source
|
||||||
|
@param url_sources: A dict of configuration info for url sources
|
||||||
|
@returns: A dict of blocklists, same as input, but (possibly) modified
|
||||||
|
"""
|
||||||
|
log.info("Fetching domain blocks from URLs...")
|
||||||
|
|
||||||
|
for item in url_sources:
|
||||||
|
url = item['url']
|
||||||
|
# If import fields are provided, they override the global ones passed in
|
||||||
|
source_import_fields = item.get('import_fields', None)
|
||||||
|
if source_import_fields:
|
||||||
|
# Ensure we always use the default fields
|
||||||
|
import_fields = IMPORT_FIELDS.extend(source_import_fields)
|
||||||
|
|
||||||
|
max_severity = item.get('max_severity', 'suspend')
|
||||||
|
listformat = item.get('format', 'csv')
|
||||||
|
with urlr.urlopen(url) as fp:
|
||||||
|
rawdata = fp.read(URL_BLOCKLIST_MAXSIZE).decode('utf-8')
|
||||||
|
blocklists[url] = parse_blocklist(rawdata, listformat, import_fields, max_severity)
|
||||||
|
|
||||||
|
if save_intermediate:
|
||||||
|
save_intermediate_blocklist(blocklists[url], url, savedir, export_fields)
|
||||||
|
|
||||||
|
return blocklists
|
||||||
|
|
||||||
|
def fetch_from_instances(blocklists: dict, sources: dict,
|
||||||
|
import_fields: list=IMPORT_FIELDS,
|
||||||
|
save_intermediate: bool=False,
|
||||||
|
savedir: str=None, export_fields: list=EXPORT_FIELDS) -> dict:
|
||||||
|
"""Fetch blocklists from other instances
|
||||||
|
@param blocklists: A dict of existing blocklists, keyed by source
|
||||||
|
@param url_sources: A dict of configuration info for url sources
|
||||||
|
@returns: A dict of blocklists, same as input, but (possibly) modified
|
||||||
|
"""
|
||||||
|
log.info("Fetching domain blocks from instances...")
|
||||||
|
for item in sources:
|
||||||
|
domain = item['domain']
|
||||||
|
admin = item.get('admin', False)
|
||||||
|
token = item.get('token', None)
|
||||||
|
# If import fields are provided, they override the global ones passed in
|
||||||
|
source_import_fields = item.get('import_fields', None)
|
||||||
|
if source_import_fields:
|
||||||
|
# Ensure we always use the default fields
|
||||||
|
import_fields = IMPORT_FIELDS.extend(source_import_fields)
|
||||||
|
|
||||||
|
# Add the blocklist with the domain as the source key
|
||||||
|
blocklists[domain] = fetch_instance_blocklist(domain, token, admin, import_fields)
|
||||||
|
if save_intermediate:
|
||||||
|
save_intermediate_blocklist(blocklists[domain], domain, savedir, export_fields)
|
||||||
|
return blocklists
|
||||||
|
|
||||||
def merge_blocklists(blocklists: dict, mergeplan: str='max') -> dict:
|
def merge_blocklists(blocklists: dict, mergeplan: str='max') -> dict:
|
||||||
"""Merge fetched remote blocklists into a bulk update
|
"""Merge fetched remote blocklists into a bulk update
|
||||||
|
|
||||||
@param mergeplan: An optional method of merging overlapping block definitions
|
@param mergeplan: An optional method of merging overlapping block definitions
|
||||||
'max' (the default) uses the highest severity block found
|
'max' (the default) uses the highest severity block found
|
||||||
'min' uses the lowest severity block found
|
'min' uses the lowest severity block found
|
||||||
|
@ -367,13 +395,16 @@ def check_followed_severity(host: str, token: str, domain: str,
|
||||||
max_followed_severity: BlockSeverity=BlockSeverity('silence')):
|
max_followed_severity: BlockSeverity=BlockSeverity('silence')):
|
||||||
"""Check an instance to see if it has followers of a to-be-blocked instance"""
|
"""Check an instance to see if it has followers of a to-be-blocked instance"""
|
||||||
|
|
||||||
|
log.debug("Checking followed severity...")
|
||||||
# Return straight away if we're not increasing the severity
|
# Return straight away if we're not increasing the severity
|
||||||
if severity <= max_followed_severity:
|
if severity <= max_followed_severity:
|
||||||
return severity
|
return severity
|
||||||
|
|
||||||
# If the instance has accounts that follow people on the to-be-blocked domain,
|
# If the instance has accounts that follow people on the to-be-blocked domain,
|
||||||
# limit the maximum severity to the configured `max_followed_severity`.
|
# limit the maximum severity to the configured `max_followed_severity`.
|
||||||
|
log.debug("checking for instance follows...")
|
||||||
follows = fetch_instance_follows(token, host, domain)
|
follows = fetch_instance_follows(token, host, domain)
|
||||||
|
time.sleep(API_CALL_DELAY)
|
||||||
if follows > 0:
|
if follows > 0:
|
||||||
log.debug(f"Instance {host} has {follows} followers of accounts at {domain}.")
|
log.debug(f"Instance {host} has {follows} followers of accounts at {domain}.")
|
||||||
if severity > max_followed_severity:
|
if severity > max_followed_severity:
|
||||||
|
@ -465,15 +496,17 @@ def push_blocklist(token: str, host: str, blocklist: list[dict],
|
||||||
|
|
||||||
# Is the severity changing?
|
# Is the severity changing?
|
||||||
if 'severity' in change_needed:
|
if 'severity' in change_needed:
|
||||||
# Confirm if we really want to change the severity
|
log.debug("Severity change requested, checking...")
|
||||||
# If we still have followers of the remote domain, we may not
|
if newseverity > oldblock.severity:
|
||||||
# want to go all the way to full suspend, depending on the configuration
|
# Confirm if we really want to change the severity
|
||||||
newseverity = check_followed_severity(host, token, oldblock.domain, newblock.severity, max_followed_severity)
|
# If we still have followers of the remote domain, we may not
|
||||||
if newseverity != oldblock.severity:
|
# want to go all the way to full suspend, depending on the configuration
|
||||||
newblock.severity = newseverity
|
newseverity = check_followed_severity(host, token, oldblock.domain, newblock.severity, max_followed_severity)
|
||||||
else:
|
if newseverity != oldblock.severity:
|
||||||
log.info("Keeping severity of block the same to avoid disrupting followers.")
|
newblock.severity = newseverity
|
||||||
change_needed.remove('severity')
|
else:
|
||||||
|
log.info("Keeping severity of block the same to avoid disrupting followers.")
|
||||||
|
change_needed.remove('severity')
|
||||||
|
|
||||||
if change_needed:
|
if change_needed:
|
||||||
log.info(f"Change detected. Need to update {change_needed} for domain block for {oldblock.domain}")
|
log.info(f"Change detected. Need to update {change_needed} for domain block for {oldblock.domain}")
|
||||||
|
@ -484,7 +517,7 @@ def push_blocklist(token: str, host: str, blocklist: list[dict],
|
||||||
if not dryrun:
|
if not dryrun:
|
||||||
update_known_block(token, host, blockdata)
|
update_known_block(token, host, blockdata)
|
||||||
# add a pause here so we don't melt the instance
|
# add a pause here so we don't melt the instance
|
||||||
time.sleep(1)
|
time.sleep(API_CALL_DELAY)
|
||||||
else:
|
else:
|
||||||
log.info("Dry run selected. Not applying changes.")
|
log.info("Dry run selected. Not applying changes.")
|
||||||
|
|
||||||
|
@ -495,24 +528,14 @@ def push_blocklist(token: str, host: str, blocklist: list[dict],
|
||||||
else:
|
else:
|
||||||
# This is a new block for the target instance, so we
|
# This is a new block for the target instance, so we
|
||||||
# need to add a block rather than update an existing one
|
# need to add a block rather than update an existing one
|
||||||
# blockdata = {
|
log.info(f"Adding new block: {newblock}...")
|
||||||
# 'domain': newblock.domain,
|
|
||||||
# # Default to Silence if nothing is specified
|
|
||||||
# 'severity': newblock.get('severity', 'silence'),
|
|
||||||
# 'public_comment': newblock.get('public_comment', ''),
|
|
||||||
# 'private_comment': newblock.get('private_comment', ''),
|
|
||||||
# 'reject_media': newblock.get('reject_media', False),
|
|
||||||
# 'reject_reports': newblock.get('reject_reports', False),
|
|
||||||
# 'obfuscate': newblock.get('obfuscate', False),
|
|
||||||
# }
|
|
||||||
|
|
||||||
# Make sure the new block doesn't clobber a domain with followers
|
# Make sure the new block doesn't clobber a domain with followers
|
||||||
newblock.severity = check_followed_severity(host, token, newblock.domain, newblock.severity, max_followed_severity)
|
newblock.severity = check_followed_severity(host, token, newblock.domain, newblock.severity, max_followed_severity)
|
||||||
log.info(f"Adding new block: {newblock}...")
|
|
||||||
if not dryrun:
|
if not dryrun:
|
||||||
add_block(token, host, newblock)
|
add_block(token, host, newblock)
|
||||||
# add a pause here so we don't melt the instance
|
# add a pause here so we don't melt the instance
|
||||||
time.sleep(1)
|
time.sleep(API_CALL_DELAY)
|
||||||
else:
|
else:
|
||||||
log.info("Dry run selected. Not adding block.")
|
log.info("Dry run selected. Not adding block.")
|
||||||
|
|
||||||
|
|
|
@ -133,11 +133,23 @@ class DomainBlock(object):
|
||||||
self.reject_reports = reject_reports
|
self.reject_reports = reject_reports
|
||||||
self.obfuscate = obfuscate
|
self.obfuscate = obfuscate
|
||||||
self.id = id
|
self.id = id
|
||||||
|
self.severity = severity
|
||||||
|
|
||||||
if isinstance(severity, BlockSeverity):
|
@property
|
||||||
self.severity = severity
|
def severity(self):
|
||||||
|
return self._severity
|
||||||
|
|
||||||
|
@severity.setter
|
||||||
|
def severity(self, sev):
|
||||||
|
if isinstance(sev, BlockSeverity):
|
||||||
|
self._severity = sev
|
||||||
else:
|
else:
|
||||||
self.severity = BlockSeverity(severity)
|
self._severity = BlockSeverity(sev)
|
||||||
|
|
||||||
|
# Suspend implies reject_media,reject_reports == True
|
||||||
|
if self._severity.level == SeverityLevel.SUSPEND:
|
||||||
|
self.reject_media = True
|
||||||
|
self.reject_reports = True
|
||||||
|
|
||||||
def _asdict(self):
|
def _asdict(self):
|
||||||
"""Return a dict version of this object
|
"""Return a dict version of this object
|
||||||
|
|
|
@ -71,4 +71,13 @@ def test_compare_diff_sevs_2():
|
||||||
a = DomainBlock('example1.org', 'suspend')
|
a = DomainBlock('example1.org', 'suspend')
|
||||||
b = DomainBlock('example1.org', 'noop')
|
b = DomainBlock('example1.org', 'noop')
|
||||||
|
|
||||||
assert a != b
|
assert a != b
|
||||||
|
|
||||||
|
def test_suspend_rejects():
|
||||||
|
"""A suspend should reject_media and reject_reports
|
||||||
|
"""
|
||||||
|
a = DomainBlock('example.org', 'suspend')
|
||||||
|
|
||||||
|
assert a.severity.level == SeverityLevel.SUSPEND
|
||||||
|
assert a.reject_media == True
|
||||||
|
assert a.reject_reports == True
|
|
@ -231,11 +231,11 @@ def test_mergeplan_same_min_bools_true():
|
||||||
assert r.obfuscate == True
|
assert r.obfuscate == True
|
||||||
|
|
||||||
def test_mergeplan_max_bools():
|
def test_mergeplan_max_bools():
|
||||||
a = DomainBlock('example.org', 'suspend', '', '', True, False, True)
|
a = DomainBlock('example.org', 'suspend', '', '', True, True, True)
|
||||||
b = DomainBlock('example.org', 'noop', '', '', False, False, False)
|
b = DomainBlock('example.org', 'noop', '', '', False, False, False)
|
||||||
|
|
||||||
r = apply_mergeplan(a, b, 'max')
|
r = apply_mergeplan(a, b, 'max')
|
||||||
|
|
||||||
assert r.reject_media == True
|
assert r.reject_media == True
|
||||||
assert r.reject_reports == False
|
assert r.reject_reports == True
|
||||||
assert r.obfuscate == True
|
assert r.obfuscate == True
|
Loading…
Reference in New Issue