Merge branch 'mergeplan'
This commit is contained in:
commit
2fef34907f
|
@ -21,6 +21,27 @@ log = logging.getLogger('fediblock_sync')
|
||||||
|
|
||||||
CONFIGFILE = "/home/mastodon/etc/admin.conf"
|
CONFIGFILE = "/home/mastodon/etc/admin.conf"
|
||||||
|
|
||||||
|
# The relative severity levels of blocks
|
||||||
|
SEVERITY = {
|
||||||
|
'noop': 0,
|
||||||
|
'silence': 1,
|
||||||
|
'suspend': 2,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Default for 'reject_media' setting for each severity level
|
||||||
|
REJECT_MEDIA_DEFAULT = {
|
||||||
|
'noop': False,
|
||||||
|
'silence': True,
|
||||||
|
'suspend': True,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Default for 'reject_reports' setting for each severity level
|
||||||
|
REJECT_REPORTS_DEFAULT = {
|
||||||
|
'noop': False,
|
||||||
|
'silence': True,
|
||||||
|
'suspend': True,
|
||||||
|
}
|
||||||
|
|
||||||
def sync_blocklists(conf: dict):
|
def sync_blocklists(conf: dict):
|
||||||
"""Sync instance blocklists from remote sources.
|
"""Sync instance blocklists from remote sources.
|
||||||
|
|
||||||
|
@ -39,6 +60,10 @@ def sync_blocklists(conf: dict):
|
||||||
rawdata = fp.read(URL_BLOCKLIST_MAXSIZE).decode('utf-8')
|
rawdata = fp.read(URL_BLOCKLIST_MAXSIZE).decode('utf-8')
|
||||||
reader = csv.DictReader(rawdata.split('\n'))
|
reader = csv.DictReader(rawdata.split('\n'))
|
||||||
for row in reader:
|
for row in reader:
|
||||||
|
# Coerce booleans from string to Python bool
|
||||||
|
for boolkey in ['reject_media', 'reject_reports', 'obfuscate']:
|
||||||
|
if boolkey in row:
|
||||||
|
row[boolkey] = str2bool(row[boolkey])
|
||||||
blocklists[listurl].append(row)
|
blocklists[listurl].append(row)
|
||||||
if conf.save_intermediate:
|
if conf.save_intermediate:
|
||||||
save_intermediate_blocklist(blocklists[listurl], listurl, conf.savedir)
|
save_intermediate_blocklist(blocklists[listurl], listurl, conf.savedir)
|
||||||
|
@ -54,7 +79,7 @@ def sync_blocklists(conf: dict):
|
||||||
save_intermediate_blocklist(blocklists[domain], domain, conf.savedir)
|
save_intermediate_blocklist(blocklists[domain], domain, conf.savedir)
|
||||||
|
|
||||||
# Merge blocklists into an update dict
|
# Merge blocklists into an update dict
|
||||||
merged = merge_blocklists(blocklists)
|
merged = merge_blocklists(blocklists, conf.mergeplan)
|
||||||
if conf.blocklist_savefile:
|
if conf.blocklist_savefile:
|
||||||
log.info(f"Saving merged blocklist to {conf.blocklist_savefile}")
|
log.info(f"Saving merged blocklist to {conf.blocklist_savefile}")
|
||||||
save_blocklist_to_file(merged.values(), conf.blocklist_savefile)
|
save_blocklist_to_file(merged.values(), conf.blocklist_savefile)
|
||||||
|
@ -65,7 +90,7 @@ def sync_blocklists(conf: dict):
|
||||||
for dest in conf.blocklist_instance_destinations:
|
for dest in conf.blocklist_instance_destinations:
|
||||||
domain = dest['domain']
|
domain = dest['domain']
|
||||||
token = dest['token']
|
token = dest['token']
|
||||||
push_blocklist(token, domain, merged.values())
|
push_blocklist(token, domain, merged.values(), conf.dryrun)
|
||||||
|
|
||||||
def merge_blocklists(blocklists: dict, mergeplan: str='max') -> dict:
|
def merge_blocklists(blocklists: dict, mergeplan: str='max') -> dict:
|
||||||
"""Merge fetched remote blocklists into a bulk update
|
"""Merge fetched remote blocklists into a bulk update
|
||||||
|
@ -77,63 +102,90 @@ def merge_blocklists(blocklists: dict, mergeplan: str='max') -> dict:
|
||||||
merged = {}
|
merged = {}
|
||||||
|
|
||||||
for key, blist in blocklists.items():
|
for key, blist in blocklists.items():
|
||||||
log.debug(f"Merging blocks from {key} ...")
|
log.debug(f"processing key {key} blist...")
|
||||||
for blockdef in blist:
|
for newblock in blist:
|
||||||
# log.debug(f"Checking blockdef {blockdef} ...")
|
domain = newblock['domain']
|
||||||
domain = blockdef['domain']
|
|
||||||
if domain in merged:
|
if domain in merged:
|
||||||
blockdata = merged[domain]
|
log.debug(f"Overlapping block for domain {domain}. Merging...")
|
||||||
|
blockdata = apply_mergeplan(merged[domain], newblock, mergeplan)
|
||||||
# If the public or private comment is different,
|
|
||||||
# append it to the existing comment, joined with a newline
|
|
||||||
if blockdef['public_comment'] != blockdata['public_comment'] and blockdata['public_comment'] != '':
|
|
||||||
blockdata['public_comment'] = '\n'.join([blockdef['public_comment'], blockdata['public_comment']])
|
|
||||||
|
|
||||||
if blockdef['private_comment'] != blockdata['private_comment'] and blockdata['private_comment'] != '':
|
|
||||||
blockdata['private_comment'] = '\n'.join([blockdef['private_comment'], blockdata['private_comment']])
|
|
||||||
|
|
||||||
# How do we override an earlier block definition?
|
|
||||||
if mergeplan in ['max', None]:
|
|
||||||
# Use the highest block level found (the default)
|
|
||||||
if blockdef['severity'] == 'suspend':
|
|
||||||
blockdata['severity'] = 'suspend'
|
|
||||||
|
|
||||||
if blockdef['reject_media'] == True:
|
|
||||||
blockdata['reject_media'] = True
|
|
||||||
|
|
||||||
if blockdef['reject_reports'] == True:
|
|
||||||
blockdata['reject_reports'] = True
|
|
||||||
|
|
||||||
elif mergeplan in ['min']:
|
|
||||||
# Use the lowest block level found
|
|
||||||
if blockdef['severity'] == 'silence':
|
|
||||||
blockdata['severity'] = 'silence'
|
|
||||||
|
|
||||||
if blockdef['reject_media'] == False:
|
|
||||||
blockdata['reject_media'] = False
|
|
||||||
|
|
||||||
if blockdef['reject_reports'] == False:
|
|
||||||
blockdata['reject_reports'] = False
|
|
||||||
|
|
||||||
else:
|
|
||||||
raise NotImplementedError(f"Mergeplan '{mergeplan}' not implemented.")
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# New block
|
# New block
|
||||||
blockdata = {
|
blockdata = {
|
||||||
'domain': blockdef['domain'],
|
'domain': newblock['domain'],
|
||||||
# Default to Silence if nothing is specified
|
# Default to Silence if nothing is specified
|
||||||
'severity': blockdef.get('severity', 'silence'),
|
'severity': newblock.get('severity', 'silence'),
|
||||||
'public_comment': blockdef['public_comment'],
|
'public_comment': newblock.get('public_comment', ''),
|
||||||
'private_comment': blockdef['private_comment'],
|
'private_comment': newblock.get('private_comment', ''),
|
||||||
'reject_media': blockdef.get('reject_media', False),
|
'obfuscate': newblock.get('obfuscate', True), # default obfuscate to True
|
||||||
'reject_reports': blockdef.get('reject_reports', False),
|
|
||||||
'obfuscate': blockdef.get('obfuscate', False),
|
|
||||||
}
|
}
|
||||||
|
sev = blockdata['severity'] # convenience variable
|
||||||
|
blockdata['reject_media'] = newblock.get('reject_media', REJECT_MEDIA_DEFAULT[sev])
|
||||||
|
blockdata['reject_reports'] = newblock.get('reject_reports', REJECT_REPORTS_DEFAULT[sev])
|
||||||
|
# end if
|
||||||
|
log.debug(f"blockdata is: {blockdata}")
|
||||||
merged[domain] = blockdata
|
merged[domain] = blockdata
|
||||||
|
# end for
|
||||||
return merged
|
return merged
|
||||||
|
|
||||||
|
def apply_mergeplan(oldblock: dict, newblock: dict, mergeplan: str='max') -> dict:
|
||||||
|
"""Use a mergeplan to decide how to merge two overlapping block definitions
|
||||||
|
|
||||||
|
@param oldblock: The exist block definition.
|
||||||
|
@param newblock: The new block definition we want to merge in.
|
||||||
|
@param mergeplan: How to merge. Choices are 'max', the default, and 'min'.
|
||||||
|
"""
|
||||||
|
# Default to the existing block definition
|
||||||
|
blockdata = oldblock.copy()
|
||||||
|
|
||||||
|
# If the public or private comment is different,
|
||||||
|
# append it to the existing comment, joined with a newline
|
||||||
|
# unless the comment is None or an empty string
|
||||||
|
for key in ['public_comment', 'private_comment']:
|
||||||
|
key = 'public_comment' # convenience variable
|
||||||
|
if oldblock[key] != newblock[key] and newblock[key] not in ['', None]:
|
||||||
|
blockdata[key] = '\n'.join([oldblock[key], newblock[key]])
|
||||||
|
|
||||||
|
# How do we override an earlier block definition?
|
||||||
|
if mergeplan in ['max', None]:
|
||||||
|
# Use the highest block level found (the default)
|
||||||
|
log.debug(f"Using 'max' mergeplan.")
|
||||||
|
|
||||||
|
if SEVERITY[newblock['severity']] > SEVERITY[oldblock['severity']]:
|
||||||
|
log.debug(f"New block severity is higher. Using that.")
|
||||||
|
blockdata['severity'] = newblock['severity']
|
||||||
|
|
||||||
|
# If obfuscate is set and is True for the domain in
|
||||||
|
# any blocklist then obfuscate is set to false.
|
||||||
|
if newblock.get('obfuscate', False):
|
||||||
|
blockdata['obfuscate'] = True
|
||||||
|
|
||||||
|
elif mergeplan in ['min']:
|
||||||
|
# Use the lowest block level found
|
||||||
|
log.debug(f"Using 'min' mergeplan.")
|
||||||
|
|
||||||
|
if SEVERITY[newblock['severity']] < SEVERITY[oldblock['severity']]:
|
||||||
|
blockdata['severity'] = newblock['severity']
|
||||||
|
|
||||||
|
# If obfuscate is set and is False for the domain in
|
||||||
|
# any blocklist then obfuscate is set to False.
|
||||||
|
if not newblock.get('obfuscate', True):
|
||||||
|
blockdata['obfuscate'] = False
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise NotImplementedError(f"Mergeplan '{mergeplan}' not implemented.")
|
||||||
|
|
||||||
|
log.debug(f"Block severity set to {blockdata['severity']}")
|
||||||
|
# Use the severity level to set rejections, if not defined in newblock
|
||||||
|
# If severity level is 'suspend', it doesn't matter what the settings is for
|
||||||
|
# 'reject_media' or 'reject_reports'
|
||||||
|
blockdata['reject_media'] = newblock.get('reject_media', REJECT_MEDIA_DEFAULT[blockdata['severity']])
|
||||||
|
blockdata['reject_reports'] = newblock.get('reject_reports', REJECT_REPORTS_DEFAULT[blockdata['severity']])
|
||||||
|
|
||||||
|
log.debug(f"set reject_media to: {blockdata['reject_media']}")
|
||||||
|
log.debug(f"set reject_reports to: {blockdata['reject_reports']}")
|
||||||
|
|
||||||
|
return blockdata
|
||||||
|
|
||||||
def fetch_instance_blocklist(token: str, host: str) -> list:
|
def fetch_instance_blocklist(token: str, host: str) -> list:
|
||||||
"""Fetch existing block list from server
|
"""Fetch existing block list from server
|
||||||
|
|
||||||
|
@ -223,7 +275,7 @@ def add_block(token: str, host: str, blockdata: dict):
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
raise ValueError(f"Something went wrong: {response.status_code}: {response.content}")
|
raise ValueError(f"Something went wrong: {response.status_code}: {response.content}")
|
||||||
|
|
||||||
def push_blocklist(token: str, host: str, blocklist: list[dict]):
|
def push_blocklist(token: str, host: str, blocklist: list[dict], dryrun: bool=False):
|
||||||
"""Push a blocklist to a remote instance.
|
"""Push a blocklist to a remote instance.
|
||||||
|
|
||||||
Merging the blocklist with the existing list the instance has,
|
Merging the blocklist with the existing list the instance has,
|
||||||
|
@ -240,14 +292,12 @@ def push_blocklist(token: str, host: str, blocklist: list[dict]):
|
||||||
# Convert serverblocks to a dictionary keyed by domain name
|
# Convert serverblocks to a dictionary keyed by domain name
|
||||||
knownblocks = {row['domain']: row for row in serverblocks}
|
knownblocks = {row['domain']: row for row in serverblocks}
|
||||||
|
|
||||||
for row in blocklist:
|
for newblock in blocklist:
|
||||||
# log.debug(f"Importing definition: {row}")
|
|
||||||
|
|
||||||
if 'id' in row: del row['id']
|
|
||||||
|
|
||||||
|
log.debug(f"applying newblock: {newblock}")
|
||||||
try:
|
try:
|
||||||
blockdict = knownblocks[row['domain']]
|
oldblock = knownblocks[newblock['domain']]
|
||||||
log.debug(f"Block already exists for {row['domain']}, merging data...")
|
log.debug(f"Block already exists for {newblock['domain']}, merging data...")
|
||||||
|
|
||||||
# Check if anything is actually different and needs updating
|
# Check if anything is actually different and needs updating
|
||||||
change_needed = False
|
change_needed = False
|
||||||
|
@ -260,38 +310,53 @@ def push_blocklist(token: str, host: str, blocklist: list[dict]):
|
||||||
'obfuscate',
|
'obfuscate',
|
||||||
]:
|
]:
|
||||||
try:
|
try:
|
||||||
if blockdict[key] != knownblocks[key]:
|
log.debug(f"Compare {key} '{oldblock[key]}' <> '{newblock[key]}'")
|
||||||
|
oldval = oldblock[key]
|
||||||
|
newval = newblock[key]
|
||||||
|
if oldval != newval:
|
||||||
|
log.debug("Difference detected. Change needed.")
|
||||||
change_needed = True
|
change_needed = True
|
||||||
break
|
break
|
||||||
|
|
||||||
except KeyError:
|
except KeyError:
|
||||||
break
|
log.debug(f"KeyError comparing {key}")
|
||||||
|
continue
|
||||||
|
|
||||||
if change_needed:
|
if change_needed:
|
||||||
log.debug(f"Change detected. Updating domain block for {row['domain']}")
|
log.info(f"Change detected. Updating domain block for {oldblock['domain']}")
|
||||||
blockdict.update(row)
|
blockdata = oldblock.copy()
|
||||||
update_known_block(token, host, blockdict)
|
blockdata.update(newblock)
|
||||||
# add a pause here so we don't melt the instance
|
if not dryrun:
|
||||||
time.sleep(1)
|
update_known_block(token, host, blockdata)
|
||||||
|
# add a pause here so we don't melt the instance
|
||||||
|
time.sleep(1)
|
||||||
|
else:
|
||||||
|
log.info("Dry run selected. Not applying changes.")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
log.debug("No differences detected. Not updating.")
|
log.debug("No differences detected. Not updating.")
|
||||||
|
pass
|
||||||
|
|
||||||
except KeyError:
|
except KeyError:
|
||||||
# domain doesn't have an entry, so we need to create one
|
# This is a new block for the target instance, so we
|
||||||
|
# need to add a block rather than update an existing one
|
||||||
blockdata = {
|
blockdata = {
|
||||||
'domain': row['domain'],
|
'domain': newblock['domain'],
|
||||||
# Default to Silence if nothing is specified
|
# Default to Silence if nothing is specified
|
||||||
'severity': row.get('severity', 'silence'),
|
'severity': newblock.get('severity', 'silence'),
|
||||||
'public_comment': row['public_comment'],
|
'public_comment': newblock.get('public_comment', ''),
|
||||||
'private_comment': row['private_comment'],
|
'private_comment': newblock.get('private_comment', ''),
|
||||||
'reject_media': row.get('reject_media', False),
|
'reject_media': newblock.get('reject_media', False),
|
||||||
'reject_reports': row.get('reject_reports', False),
|
'reject_reports': newblock.get('reject_reports', False),
|
||||||
'obfuscate': row.get('obfuscate', False),
|
'obfuscate': newblock.get('obfuscate', False),
|
||||||
}
|
}
|
||||||
log.info(f"Adding new block for {blockdata['domain']}...")
|
log.info(f"Adding new block for {blockdata['domain']}...")
|
||||||
add_block(token, host, blockdata)
|
if not dryrun:
|
||||||
# add a pause here so we don't melt the instance
|
add_block(token, host, blockdata)
|
||||||
time.sleep(1)
|
# add a pause here so we don't melt the instance
|
||||||
|
time.sleep(1)
|
||||||
|
else:
|
||||||
|
log.info("Dry run selected. Not adding block.")
|
||||||
|
|
||||||
def load_config(configfile: str):
|
def load_config(configfile: str):
|
||||||
"""Augment commandline arguments with config file parameters
|
"""Augment commandline arguments with config file parameters
|
||||||
|
@ -359,6 +424,17 @@ def augment_args(args):
|
||||||
|
|
||||||
return args
|
return args
|
||||||
|
|
||||||
|
def str2bool(boolstring: str) -> bool:
|
||||||
|
"""Helper function to convert boolean strings to actual Python bools
|
||||||
|
"""
|
||||||
|
boolstring = boolstring.lower()
|
||||||
|
if boolstring in ['true', 't', '1', 'y', 'yes']:
|
||||||
|
return True
|
||||||
|
elif boolstring in ['false', 'f', '0', 'n', 'no']:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Cannot parse value '{boolstring}' as boolean")
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
ap = argparse.ArgumentParser(description="Bulk blocklist tool",
|
ap = argparse.ArgumentParser(description="Bulk blocklist tool",
|
||||||
|
@ -368,12 +444,14 @@ if __name__ == '__main__':
|
||||||
ap.add_argument('-o', '--outfile', dest="blocklist_savefile", help="Save merged blocklist to a local file.")
|
ap.add_argument('-o', '--outfile', dest="blocklist_savefile", help="Save merged blocklist to a local file.")
|
||||||
ap.add_argument('-S', '--save-intermediate', dest="save_intermediate", action='store_true', help="Save intermediate blocklists we fetch to local files.")
|
ap.add_argument('-S', '--save-intermediate', dest="save_intermediate", action='store_true', help="Save intermediate blocklists we fetch to local files.")
|
||||||
ap.add_argument('-D', '--savedir', dest="savedir", help="Directory path to save intermediate lists.")
|
ap.add_argument('-D', '--savedir', dest="savedir", help="Directory path to save intermediate lists.")
|
||||||
|
ap.add_argument('-m', '--mergeplan', choices=['min', 'max'], default='max', help="Set mergeplan.")
|
||||||
|
|
||||||
ap.add_argument('--no-fetch-url', dest='no_fetch_url', action='store_true', help="Don't fetch from URLs, even if configured.")
|
ap.add_argument('--no-fetch-url', dest='no_fetch_url', action='store_true', help="Don't fetch from URLs, even if configured.")
|
||||||
ap.add_argument('--no-fetch-instance', dest='no_fetch_instance', action='store_true', help="Don't fetch from instances, even if configured.")
|
ap.add_argument('--no-fetch-instance', dest='no_fetch_instance', action='store_true', help="Don't fetch from instances, even if configured.")
|
||||||
ap.add_argument('--no-push-instance', dest='no_push_instance', action='store_true', help="Don't push to instances, even if configured.")
|
ap.add_argument('--no-push-instance', dest='no_push_instance', action='store_true', help="Don't push to instances, even if configured.")
|
||||||
|
|
||||||
ap.add_argument('--loglevel', choices=['debug', 'info', 'warning', 'error', 'critical'], help="Set log output level.")
|
ap.add_argument('--loglevel', choices=['debug', 'info', 'warning', 'error', 'critical'], help="Set log output level.")
|
||||||
|
ap.add_argument('--dryrun', action='store_true', help="Don't actually push updates, just show what would happen.")
|
||||||
|
|
||||||
args = ap.parse_args()
|
args = ap.parse_args()
|
||||||
if args.loglevel is not None:
|
if args.loglevel is not None:
|
||||||
|
|
|
@ -32,4 +32,10 @@ blocklist_instance_destinations = [
|
||||||
# no_fetch_url = false
|
# no_fetch_url = false
|
||||||
|
|
||||||
## Don't fetch blocklists from instances, even if they're defined above
|
## Don't fetch blocklists from instances, even if they're defined above
|
||||||
# no_fetch_instance = false
|
# no_fetch_instance = false
|
||||||
|
|
||||||
|
## Set the mergeplan to use when dealing with overlaps between blocklists
|
||||||
|
# The default 'max' mergeplan will use the harshest severity block found for a domain.
|
||||||
|
# The 'min' mergeplan will use the lightest severity block found for a domain.
|
||||||
|
# mergeplan = 'max'
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue