From 8fc8160f829b385e4566d2e73a26a932ca7d28f6 Mon Sep 17 00:00:00 2001 From: Justin Warren Date: Tue, 20 Dec 2022 14:29:23 +1100 Subject: [PATCH] More advanced mergeplan handling. Support for 'noop' severity for removing a block. Support for mergeplan as commandline parameter. Support for mergeplan as config file option. Added --dryrun commandline option. --- bin/fediblock_sync.py | 230 +++++++++++++++++++---------- etc/sample.fediblockhole.conf.toml | 8 +- 2 files changed, 161 insertions(+), 77 deletions(-) diff --git a/bin/fediblock_sync.py b/bin/fediblock_sync.py index f7cc066..b49a327 100755 --- a/bin/fediblock_sync.py +++ b/bin/fediblock_sync.py @@ -21,6 +21,27 @@ log = logging.getLogger('fediblock_sync') CONFIGFILE = "/home/mastodon/etc/admin.conf" +# The relative severity levels of blocks +SEVERITY = { + 'noop': 0, + 'silence': 1, + 'suspend': 2, +} + +# Default for 'reject_media' setting for each severity level +REJECT_MEDIA_DEFAULT = { + 'noop': False, + 'silence': True, + 'suspend': True, +} + +# Default for 'reject_reports' setting for each severity level +REJECT_REPORTS_DEFAULT = { + 'noop': False, + 'silence': True, + 'suspend': True, +} + def sync_blocklists(conf: dict): """Sync instance blocklists from remote sources. @@ -39,6 +60,10 @@ def sync_blocklists(conf: dict): rawdata = fp.read(URL_BLOCKLIST_MAXSIZE).decode('utf-8') reader = csv.DictReader(rawdata.split('\n')) for row in reader: + # Coerce booleans from string to Python bool + for boolkey in ['reject_media', 'reject_reports', 'obfuscate']: + if boolkey in row: + row[boolkey] = str2bool(row[boolkey]) blocklists[listurl].append(row) if conf.save_intermediate: save_intermediate_blocklist(blocklists[listurl], listurl, conf.savedir) @@ -54,7 +79,7 @@ def sync_blocklists(conf: dict): save_intermediate_blocklist(blocklists[domain], domain, conf.savedir) # Merge blocklists into an update dict - merged = merge_blocklists(blocklists) + merged = merge_blocklists(blocklists, conf.mergeplan) if conf.blocklist_savefile: log.info(f"Saving merged blocklist to {conf.blocklist_savefile}") save_blocklist_to_file(merged.values(), conf.blocklist_savefile) @@ -65,7 +90,7 @@ def sync_blocklists(conf: dict): for dest in conf.blocklist_instance_destinations: domain = dest['domain'] token = dest['token'] - push_blocklist(token, domain, merged.values()) + push_blocklist(token, domain, merged.values(), conf.dryrun) def merge_blocklists(blocklists: dict, mergeplan: str='max') -> dict: """Merge fetched remote blocklists into a bulk update @@ -77,63 +102,90 @@ def merge_blocklists(blocklists: dict, mergeplan: str='max') -> dict: merged = {} for key, blist in blocklists.items(): - log.debug(f"Merging blocks from {key} ...") - for blockdef in blist: - # log.debug(f"Checking blockdef {blockdef} ...") - domain = blockdef['domain'] + log.debug(f"processing key {key} blist...") + for newblock in blist: + domain = newblock['domain'] if domain in merged: - blockdata = merged[domain] - - # If the public or private comment is different, - # append it to the existing comment, joined with a newline - if blockdef['public_comment'] != blockdata['public_comment'] and blockdata['public_comment'] != '': - blockdata['public_comment'] = '\n'.join([blockdef['public_comment'], blockdata['public_comment']]) - - if blockdef['private_comment'] != blockdata['private_comment'] and blockdata['private_comment'] != '': - blockdata['private_comment'] = '\n'.join([blockdef['private_comment'], blockdata['private_comment']]) - - # How do we override an earlier block definition? - if mergeplan in ['max', None]: - # Use the highest block level found (the default) - if blockdef['severity'] == 'suspend': - blockdata['severity'] = 'suspend' - - if blockdef['reject_media'] == True: - blockdata['reject_media'] = True - - if blockdef['reject_reports'] == True: - blockdata['reject_reports'] = True - - elif mergeplan in ['min']: - # Use the lowest block level found - if blockdef['severity'] == 'silence': - blockdata['severity'] = 'silence' - - if blockdef['reject_media'] == False: - blockdata['reject_media'] = False - - if blockdef['reject_reports'] == False: - blockdata['reject_reports'] = False - - else: - raise NotImplementedError(f"Mergeplan '{mergeplan}' not implemented.") - + log.debug(f"Overlapping block for domain {domain}. Merging...") + blockdata = apply_mergeplan(merged[domain], newblock, mergeplan) else: # New block blockdata = { - 'domain': blockdef['domain'], + 'domain': newblock['domain'], # Default to Silence if nothing is specified - 'severity': blockdef.get('severity', 'silence'), - 'public_comment': blockdef['public_comment'], - 'private_comment': blockdef['private_comment'], - 'reject_media': blockdef.get('reject_media', False), - 'reject_reports': blockdef.get('reject_reports', False), - 'obfuscate': blockdef.get('obfuscate', False), + 'severity': newblock.get('severity', 'silence'), + 'public_comment': newblock.get('public_comment', ''), + 'private_comment': newblock.get('private_comment', ''), + 'obfuscate': newblock.get('obfuscate', True), # default obfuscate to True } + sev = blockdata['severity'] # convenience variable + blockdata['reject_media'] = newblock.get('reject_media', REJECT_MEDIA_DEFAULT[sev]) + blockdata['reject_reports'] = newblock.get('reject_reports', REJECT_REPORTS_DEFAULT[sev]) + # end if + log.debug(f"blockdata is: {blockdata}") merged[domain] = blockdata - + # end for return merged +def apply_mergeplan(oldblock: dict, newblock: dict, mergeplan: str='max') -> dict: + """Use a mergeplan to decide how to merge two overlapping block definitions + + @param oldblock: The exist block definition. + @param newblock: The new block definition we want to merge in. + @param mergeplan: How to merge. Choices are 'max', the default, and 'min'. + """ + # Default to the existing block definition + blockdata = oldblock.copy() + + # If the public or private comment is different, + # append it to the existing comment, joined with a newline + # unless the comment is None or an empty string + for key in ['public_comment', 'private_comment']: + key = 'public_comment' # convenience variable + if oldblock[key] != newblock[key] and newblock[key] not in ['', None]: + blockdata[key] = '\n'.join([oldblock[key], newblock[key]]) + + # How do we override an earlier block definition? + if mergeplan in ['max', None]: + # Use the highest block level found (the default) + log.debug(f"Using 'max' mergeplan.") + + if SEVERITY[newblock['severity']] > SEVERITY[oldblock['severity']]: + log.debug(f"New block severity is higher. Using that.") + blockdata['severity'] = newblock['severity'] + + # If obfuscate is set and is True for the domain in + # any blocklist then obfuscate is set to false. + if newblock.get('obfuscate', False): + blockdata['obfuscate'] = True + + elif mergeplan in ['min']: + # Use the lowest block level found + log.debug(f"Using 'min' mergeplan.") + + if SEVERITY[newblock['severity']] < SEVERITY[oldblock['severity']]: + blockdata['severity'] = newblock['severity'] + + # If obfuscate is set and is False for the domain in + # any blocklist then obfuscate is set to False. + if not newblock.get('obfuscate', True): + blockdata['obfuscate'] = False + + else: + raise NotImplementedError(f"Mergeplan '{mergeplan}' not implemented.") + + log.debug(f"Block severity set to {blockdata['severity']}") + # Use the severity level to set rejections, if not defined in newblock + # If severity level is 'suspend', it doesn't matter what the settings is for + # 'reject_media' or 'reject_reports' + blockdata['reject_media'] = newblock.get('reject_media', REJECT_MEDIA_DEFAULT[blockdata['severity']]) + blockdata['reject_reports'] = newblock.get('reject_reports', REJECT_REPORTS_DEFAULT[blockdata['severity']]) + + log.debug(f"set reject_media to: {blockdata['reject_media']}") + log.debug(f"set reject_reports to: {blockdata['reject_reports']}") + + return blockdata + def fetch_instance_blocklist(token: str, host: str) -> list: """Fetch existing block list from server @@ -223,7 +275,7 @@ def add_block(token: str, host: str, blockdata: dict): if response.status_code != 200: raise ValueError(f"Something went wrong: {response.status_code}: {response.content}") -def push_blocklist(token: str, host: str, blocklist: list[dict]): +def push_blocklist(token: str, host: str, blocklist: list[dict], dryrun: bool=False): """Push a blocklist to a remote instance. Merging the blocklist with the existing list the instance has, @@ -240,14 +292,12 @@ def push_blocklist(token: str, host: str, blocklist: list[dict]): # Convert serverblocks to a dictionary keyed by domain name knownblocks = {row['domain']: row for row in serverblocks} - for row in blocklist: - # log.debug(f"Importing definition: {row}") - - if 'id' in row: del row['id'] + for newblock in blocklist: + log.debug(f"applying newblock: {newblock}") try: - blockdict = knownblocks[row['domain']] - log.debug(f"Block already exists for {row['domain']}, merging data...") + oldblock = knownblocks[newblock['domain']] + log.debug(f"Block already exists for {newblock['domain']}, merging data...") # Check if anything is actually different and needs updating change_needed = False @@ -260,38 +310,53 @@ def push_blocklist(token: str, host: str, blocklist: list[dict]): 'obfuscate', ]: try: - if blockdict[key] != knownblocks[key]: + log.debug(f"Compare {key} '{oldblock[key]}' <> '{newblock[key]}'") + oldval = oldblock[key] + newval = newblock[key] + if oldval != newval: + log.debug("Difference detected. Change needed.") change_needed = True break + except KeyError: - break + log.debug(f"KeyError comparing {key}") + continue if change_needed: - log.debug(f"Change detected. Updating domain block for {row['domain']}") - blockdict.update(row) - update_known_block(token, host, blockdict) - # add a pause here so we don't melt the instance - time.sleep(1) + log.info(f"Change detected. Updating domain block for {oldblock['domain']}") + blockdata = oldblock.copy() + blockdata.update(newblock) + if not dryrun: + update_known_block(token, host, blockdata) + # add a pause here so we don't melt the instance + time.sleep(1) + else: + log.info("Dry run selected. Not applying changes.") else: log.debug("No differences detected. Not updating.") + pass except KeyError: - # domain doesn't have an entry, so we need to create one + # This is a new block for the target instance, so we + # need to add a block rather than update an existing one blockdata = { - 'domain': row['domain'], + 'domain': newblock['domain'], # Default to Silence if nothing is specified - 'severity': row.get('severity', 'silence'), - 'public_comment': row['public_comment'], - 'private_comment': row['private_comment'], - 'reject_media': row.get('reject_media', False), - 'reject_reports': row.get('reject_reports', False), - 'obfuscate': row.get('obfuscate', False), + 'severity': newblock.get('severity', 'silence'), + 'public_comment': newblock.get('public_comment', ''), + 'private_comment': newblock.get('private_comment', ''), + 'reject_media': newblock.get('reject_media', False), + 'reject_reports': newblock.get('reject_reports', False), + 'obfuscate': newblock.get('obfuscate', False), } log.info(f"Adding new block for {blockdata['domain']}...") - add_block(token, host, blockdata) - # add a pause here so we don't melt the instance - time.sleep(1) + if not dryrun: + add_block(token, host, blockdata) + # add a pause here so we don't melt the instance + time.sleep(1) + else: + log.info("Dry run selected. Not adding block.") def load_config(configfile: str): """Augment commandline arguments with config file parameters @@ -359,6 +424,17 @@ def augment_args(args): return args +def str2bool(boolstring: str) -> bool: + """Helper function to convert boolean strings to actual Python bools + """ + boolstring = boolstring.lower() + if boolstring in ['true', 't', '1', 'y', 'yes']: + return True + elif boolstring in ['false', 'f', '0', 'n', 'no']: + return False + else: + raise ValueError(f"Cannot parse value '{boolstring}' as boolean") + if __name__ == '__main__': ap = argparse.ArgumentParser(description="Bulk blocklist tool", @@ -368,12 +444,14 @@ if __name__ == '__main__': ap.add_argument('-o', '--outfile', dest="blocklist_savefile", help="Save merged blocklist to a local file.") ap.add_argument('-S', '--save-intermediate', dest="save_intermediate", action='store_true', help="Save intermediate blocklists we fetch to local files.") ap.add_argument('-D', '--savedir', dest="savedir", help="Directory path to save intermediate lists.") - + ap.add_argument('-m', '--mergeplan', choices=['min', 'max'], default='max', help="Set mergeplan.") + ap.add_argument('--no-fetch-url', dest='no_fetch_url', action='store_true', help="Don't fetch from URLs, even if configured.") ap.add_argument('--no-fetch-instance', dest='no_fetch_instance', action='store_true', help="Don't fetch from instances, even if configured.") ap.add_argument('--no-push-instance', dest='no_push_instance', action='store_true', help="Don't push to instances, even if configured.") ap.add_argument('--loglevel', choices=['debug', 'info', 'warning', 'error', 'critical'], help="Set log output level.") + ap.add_argument('--dryrun', action='store_true', help="Don't actually push updates, just show what would happen.") args = ap.parse_args() if args.loglevel is not None: diff --git a/etc/sample.fediblockhole.conf.toml b/etc/sample.fediblockhole.conf.toml index 973779c..5131390 100644 --- a/etc/sample.fediblockhole.conf.toml +++ b/etc/sample.fediblockhole.conf.toml @@ -32,4 +32,10 @@ blocklist_instance_destinations = [ # no_fetch_url = false ## Don't fetch blocklists from instances, even if they're defined above -# no_fetch_instance = false \ No newline at end of file +# no_fetch_instance = false + +## Set the mergeplan to use when dealing with overlaps between blocklists +# The default 'max' mergeplan will use the harshest severity block found for a domain. +# The 'min' mergeplan will use the lightest severity block found for a domain. +# mergeplan = 'max' +