From 327a44d90702fa699168d1f30fb68fa28a4ac5b7 Mon Sep 17 00:00:00 2001 From: Justin Warren Date: Thu, 12 Jan 2023 09:09:06 +1100 Subject: [PATCH] Removed redundant global vars. Refactored fetch from URLs and instances. Change API delay to be in calls per hour. Improved check_followed_severity() behaviour. Updated sample config to include new format. Added support for per-instance-source import_fields. BlockSeverity of 'suspend' implies reject_media and reject_reports. --- etc/sample.fediblockhole.conf.toml | 10 +- src/fediblockhole/__init__.py | 155 +++++++++++++++++------------ src/fediblockhole/const.py | 18 +++- tests/test_domainblock.py | 11 +- tests/test_mergeplan.py | 4 +- 5 files changed, 124 insertions(+), 74 deletions(-) diff --git a/etc/sample.fediblockhole.conf.toml b/etc/sample.fediblockhole.conf.toml index d39c999..637dde2 100644 --- a/etc/sample.fediblockhole.conf.toml +++ b/etc/sample.fediblockhole.conf.toml @@ -3,6 +3,8 @@ # Otherwise, `token` is a Bearer token authorised to read domain_blocks. # If `admin` = True, use the more detailed admin API, which requires a token with a # higher level of authorization. +# If `import_fields` are provided, only import these fields from the instance. +# Overrides the global `import_fields` setting. blocklist_instance_sources = [ # { domain = 'public.blocklist'}, # an instance with a public list of domain_blocks # { domain = 'jorts.horse', token = '' }, # user accessible block list @@ -10,9 +12,13 @@ blocklist_instance_sources = [ ] # List of URLs to read csv blocklists from +# Format tells the parser which format to use when parsing the blocklist +# max_severity tells the parser to override any severities that are higher than this value +# import_fields tells the parser to only import that set of fields from a specific source blocklist_url_sources = [ - # 'file:///etc/fediblockhole/blocklist-01.csv', - 'https://raw.githubusercontent.com/eigenmagic/fediblockhole/main/samples/demo-blocklist-01.csv', + # { url = 'file:///home/daedalus/src/fediblockhole/samples/demo-blocklist-01.csv', format = 'csv' }, + { url = 'https://raw.githubusercontent.com/eigenmagic/fediblockhole/main/samples/demo-blocklist-01.csv', format = 'csv' }, + ] # List of instances to write blocklist to diff --git a/src/fediblockhole/__init__.py b/src/fediblockhole/__init__.py index 5776b2c..a98a09a 100755 --- a/src/fediblockhole/__init__.py +++ b/src/fediblockhole/__init__.py @@ -25,29 +25,17 @@ log = logging.getLogger('fediblockhole') # Max size of a URL-fetched blocklist URL_BLOCKLIST_MAXSIZE = 1024 ** 3 -# The relative severity levels of blocks -SEVERITY = { - 'noop': 0, - 'silence': 1, - 'suspend': 2, -} - -# Default for 'reject_media' setting for each severity level -REJECT_MEDIA_DEFAULT = { - 'noop': False, - 'silence': True, - 'suspend': True, -} - -# Default for 'reject_reports' setting for each severity level -REJECT_REPORTS_DEFAULT = { - 'noop': False, - 'silence': True, - 'suspend': True, -} - # Wait at most this long for a remote server to respond -REQUEST_TIMEOUT=30 +REQUEST_TIMEOUT = 30 + +# Time to wait between instance API calls to we don't melt them +API_CALL_DELAY = 3600 / 300 # 300 API calls per hour + +# We always import the domain and the severity +IMPORT_FIELDS = ['domain', 'severity'] + +# We always export the domain and the severity +EXPORT_FIELDS = ['domain', 'severity'] def sync_blocklists(conf: dict): """Sync instance blocklists from remote sources. @@ -58,40 +46,25 @@ def sync_blocklists(conf: dict): # We will merge these later using a merge algorithm we choose. # Always import these fields - import_fields = ['domain', 'severity'] + import_fields = IMPORT_FIELDS # Add extra import fields if defined in config import_fields.extend(conf.import_fields) # Always export these fields - export_fields = ['domain', 'severity'] + export_fields = EXPORT_FIELDS # Add extra export fields if defined in config export_fields.extend(conf.export_fields) blocklists = {} # Fetch blocklists from URLs if not conf.no_fetch_url: - log.info("Fetching domain blocks from URLs...") - for listurl in conf.blocklist_url_sources: - url = listurl['url'] - max_severity = listurl.get('max_severity', 'suspend') - listformat = listurl.get('format', 'csv') - with urlr.urlopen(url) as fp: - rawdata = fp.read(URL_BLOCKLIST_MAXSIZE).decode('utf-8') - blocklists[url] = parse_blocklist(rawdata, listformat, import_fields, max_severity) - - if conf.save_intermediate: - save_intermediate_blocklist(blocklists[url], url, conf.savedir, export_fields) + blocklists = fetch_from_urls(blocklists, conf.blocklist_url_sources, + import_fields, conf.save_intermediate, conf.savedir, export_fields) # Fetch blocklists from remote instances if not conf.no_fetch_instance: - log.info("Fetching domain blocks from instances...") - for blocklist_src in conf.blocklist_instance_sources: - domain = blocklist_src['domain'] - admin = blocklist_src.get('admin', False) - token = blocklist_src.get('token', None) - blocklists[domain] = fetch_instance_blocklist(domain, token, admin, import_fields) - if conf.save_intermediate: - save_intermediate_blocklist(blocklists[domain], domain, conf.savedir, export_fields) + blocklists = fetch_from_instances(blocklists, conf.blocklist_instance_sources, + import_fields, conf.save_intermediate, conf.savedir, export_fields) # Merge blocklists into an update dict merged = merge_blocklists(blocklists, conf.mergeplan) @@ -108,9 +81,64 @@ def sync_blocklists(conf: dict): max_followed_severity = BlockSeverity(dest.get('max_followed_severity', 'silence')) push_blocklist(token, domain, merged.values(), conf.dryrun, import_fields, max_followed_severity) +def fetch_from_urls(blocklists: dict, url_sources: dict, + import_fields: list=IMPORT_FIELDS, + save_intermediate: bool=False, + savedir: str=None, export_fields: list=EXPORT_FIELDS) -> dict: + """Fetch blocklists from URL sources + @param blocklists: A dict of existing blocklists, keyed by source + @param url_sources: A dict of configuration info for url sources + @returns: A dict of blocklists, same as input, but (possibly) modified + """ + log.info("Fetching domain blocks from URLs...") + + for item in url_sources: + url = item['url'] + # If import fields are provided, they override the global ones passed in + source_import_fields = item.get('import_fields', None) + if source_import_fields: + # Ensure we always use the default fields + import_fields = IMPORT_FIELDS.extend(source_import_fields) + + max_severity = item.get('max_severity', 'suspend') + listformat = item.get('format', 'csv') + with urlr.urlopen(url) as fp: + rawdata = fp.read(URL_BLOCKLIST_MAXSIZE).decode('utf-8') + blocklists[url] = parse_blocklist(rawdata, listformat, import_fields, max_severity) + + if save_intermediate: + save_intermediate_blocklist(blocklists[url], url, savedir, export_fields) + + return blocklists + +def fetch_from_instances(blocklists: dict, sources: dict, + import_fields: list=IMPORT_FIELDS, + save_intermediate: bool=False, + savedir: str=None, export_fields: list=EXPORT_FIELDS) -> dict: + """Fetch blocklists from other instances + @param blocklists: A dict of existing blocklists, keyed by source + @param url_sources: A dict of configuration info for url sources + @returns: A dict of blocklists, same as input, but (possibly) modified + """ + log.info("Fetching domain blocks from instances...") + for item in sources: + domain = item['domain'] + admin = item.get('admin', False) + token = item.get('token', None) + # If import fields are provided, they override the global ones passed in + source_import_fields = item.get('import_fields', None) + if source_import_fields: + # Ensure we always use the default fields + import_fields = IMPORT_FIELDS.extend(source_import_fields) + + # Add the blocklist with the domain as the source key + blocklists[domain] = fetch_instance_blocklist(domain, token, admin, import_fields) + if save_intermediate: + save_intermediate_blocklist(blocklists[domain], domain, savedir, export_fields) + return blocklists + def merge_blocklists(blocklists: dict, mergeplan: str='max') -> dict: """Merge fetched remote blocklists into a bulk update - @param mergeplan: An optional method of merging overlapping block definitions 'max' (the default) uses the highest severity block found 'min' uses the lowest severity block found @@ -367,13 +395,16 @@ def check_followed_severity(host: str, token: str, domain: str, max_followed_severity: BlockSeverity=BlockSeverity('silence')): """Check an instance to see if it has followers of a to-be-blocked instance""" + log.debug("Checking followed severity...") # Return straight away if we're not increasing the severity if severity <= max_followed_severity: return severity # If the instance has accounts that follow people on the to-be-blocked domain, # limit the maximum severity to the configured `max_followed_severity`. + log.debug("checking for instance follows...") follows = fetch_instance_follows(token, host, domain) + time.sleep(API_CALL_DELAY) if follows > 0: log.debug(f"Instance {host} has {follows} followers of accounts at {domain}.") if severity > max_followed_severity: @@ -465,15 +496,17 @@ def push_blocklist(token: str, host: str, blocklist: list[dict], # Is the severity changing? if 'severity' in change_needed: - # Confirm if we really want to change the severity - # If we still have followers of the remote domain, we may not - # want to go all the way to full suspend, depending on the configuration - newseverity = check_followed_severity(host, token, oldblock.domain, newblock.severity, max_followed_severity) - if newseverity != oldblock.severity: - newblock.severity = newseverity - else: - log.info("Keeping severity of block the same to avoid disrupting followers.") - change_needed.remove('severity') + log.debug("Severity change requested, checking...") + if newseverity > oldblock.severity: + # Confirm if we really want to change the severity + # If we still have followers of the remote domain, we may not + # want to go all the way to full suspend, depending on the configuration + newseverity = check_followed_severity(host, token, oldblock.domain, newblock.severity, max_followed_severity) + if newseverity != oldblock.severity: + newblock.severity = newseverity + else: + log.info("Keeping severity of block the same to avoid disrupting followers.") + change_needed.remove('severity') if change_needed: log.info(f"Change detected. Need to update {change_needed} for domain block for {oldblock.domain}") @@ -484,7 +517,7 @@ def push_blocklist(token: str, host: str, blocklist: list[dict], if not dryrun: update_known_block(token, host, blockdata) # add a pause here so we don't melt the instance - time.sleep(1) + time.sleep(API_CALL_DELAY) else: log.info("Dry run selected. Not applying changes.") @@ -495,24 +528,14 @@ def push_blocklist(token: str, host: str, blocklist: list[dict], else: # This is a new block for the target instance, so we # need to add a block rather than update an existing one - # blockdata = { - # 'domain': newblock.domain, - # # Default to Silence if nothing is specified - # 'severity': newblock.get('severity', 'silence'), - # 'public_comment': newblock.get('public_comment', ''), - # 'private_comment': newblock.get('private_comment', ''), - # 'reject_media': newblock.get('reject_media', False), - # 'reject_reports': newblock.get('reject_reports', False), - # 'obfuscate': newblock.get('obfuscate', False), - # } + log.info(f"Adding new block: {newblock}...") # Make sure the new block doesn't clobber a domain with followers newblock.severity = check_followed_severity(host, token, newblock.domain, newblock.severity, max_followed_severity) - log.info(f"Adding new block: {newblock}...") if not dryrun: add_block(token, host, newblock) # add a pause here so we don't melt the instance - time.sleep(1) + time.sleep(API_CALL_DELAY) else: log.info("Dry run selected. Not adding block.") diff --git a/src/fediblockhole/const.py b/src/fediblockhole/const.py index 58de5f6..4fbdc43 100644 --- a/src/fediblockhole/const.py +++ b/src/fediblockhole/const.py @@ -133,11 +133,23 @@ class DomainBlock(object): self.reject_reports = reject_reports self.obfuscate = obfuscate self.id = id + self.severity = severity - if isinstance(severity, BlockSeverity): - self.severity = severity + @property + def severity(self): + return self._severity + + @severity.setter + def severity(self, sev): + if isinstance(sev, BlockSeverity): + self._severity = sev else: - self.severity = BlockSeverity(severity) + self._severity = BlockSeverity(sev) + + # Suspend implies reject_media,reject_reports == True + if self._severity.level == SeverityLevel.SUSPEND: + self.reject_media = True + self.reject_reports = True def _asdict(self): """Return a dict version of this object diff --git a/tests/test_domainblock.py b/tests/test_domainblock.py index 6c37fdd..783fcd8 100644 --- a/tests/test_domainblock.py +++ b/tests/test_domainblock.py @@ -71,4 +71,13 @@ def test_compare_diff_sevs_2(): a = DomainBlock('example1.org', 'suspend') b = DomainBlock('example1.org', 'noop') - assert a != b \ No newline at end of file + assert a != b + +def test_suspend_rejects(): + """A suspend should reject_media and reject_reports + """ + a = DomainBlock('example.org', 'suspend') + + assert a.severity.level == SeverityLevel.SUSPEND + assert a.reject_media == True + assert a.reject_reports == True \ No newline at end of file diff --git a/tests/test_mergeplan.py b/tests/test_mergeplan.py index b8454ef..55f3914 100644 --- a/tests/test_mergeplan.py +++ b/tests/test_mergeplan.py @@ -231,11 +231,11 @@ def test_mergeplan_same_min_bools_true(): assert r.obfuscate == True def test_mergeplan_max_bools(): - a = DomainBlock('example.org', 'suspend', '', '', True, False, True) + a = DomainBlock('example.org', 'suspend', '', '', True, True, True) b = DomainBlock('example.org', 'noop', '', '', False, False, False) r = apply_mergeplan(a, b, 'max') assert r.reject_media == True - assert r.reject_reports == False + assert r.reject_reports == True assert r.obfuscate == True \ No newline at end of file