Added ability to read domain_blocks from

instances that make the list public.
Skip obfuscated domains when building the merged blocklist.
Removed some redundant commented code.
This commit is contained in:
Justin Warren 2023-01-09 12:48:11 +11:00
parent 4bf68019fb
commit 4ef84b5275
No known key found for this signature in database
2 changed files with 47 additions and 37 deletions

View File

@ -90,8 +90,9 @@ def sync_blocklists(conf: dict):
log.info("Fetching domain blocks from instances...") log.info("Fetching domain blocks from instances...")
for blocklist_src in conf.blocklist_instance_sources: for blocklist_src in conf.blocklist_instance_sources:
domain = blocklist_src['domain'] domain = blocklist_src['domain']
token = blocklist_src['token'] admin = blocklist_src.get('admin', False)
blocklists[domain] = fetch_instance_blocklist(token, domain, import_fields) token = blocklist_src.get('token', None)
blocklists[domain] = fetch_instance_blocklist(domain, token, admin, import_fields)
if conf.save_intermediate: if conf.save_intermediate:
save_intermediate_blocklist(blocklists[domain], domain, conf.savedir, export_fields) save_intermediate_blocklist(blocklists[domain], domain, conf.savedir, export_fields)
@ -119,25 +120,22 @@ def merge_blocklists(blocklists: dict, mergeplan: str='max') -> dict:
merged = {} merged = {}
for key, blist in blocklists.items(): for key, blist in blocklists.items():
log.debug(f"processing key {key} blist...") log.debug(f"processing blocklist from: {key} ...")
for newblock in blist: for newblock in blist:
domain = newblock['domain'] domain = newblock['domain']
if domain in merged: # If the domain has two asterisks in it, it's obfuscated
# and we can't really use it, so skip it and do the next one
if '**' in domain:
log.debug(f"Domain '{domain}' is obfuscated. Skipping it.")
continue
elif domain in merged:
log.debug(f"Overlapping block for domain {domain}. Merging...") log.debug(f"Overlapping block for domain {domain}. Merging...")
blockdata = apply_mergeplan(merged[domain], newblock, mergeplan) blockdata = apply_mergeplan(merged[domain], newblock, mergeplan)
else: else:
# New block # New block
blockdata = newblock blockdata = newblock
# blockdata = {
# 'domain': newblock['domain'],
# # Default to Silence if nothing is specified
# 'severity': newblock.get('severity', 'silence'),
# 'public_comment': newblock.get('public_comment', ''),
# 'obfuscate': newblock.get('obfuscate', True), # default obfuscate to True
# }
# sev = blockdata['severity'] # convenience variable
# blockdata['reject_media'] = newblock.get('reject_media', REJECT_MEDIA_DEFAULT[sev])
# blockdata['reject_reports'] = newblock.get('reject_reports', REJECT_REPORTS_DEFAULT[sev])
# end if # end if
log.debug(f"blockdata is: {blockdata}") log.debug(f"blockdata is: {blockdata}")
@ -161,7 +159,9 @@ def apply_mergeplan(oldblock: dict, newblock: dict, mergeplan: str='max') -> dic
keylist = ['public_comment', 'private_comment'] keylist = ['public_comment', 'private_comment']
for key in keylist: for key in keylist:
try: try:
if oldblock[key] != newblock[key] and newblock[key] not in ['', None]: if oldblock[key] not in ['', None] and newblock[key] not in ['', None] and oldblock[key] != newblock[key]:
log.debug(f"old comment: '{oldblock[key]}'")
log.debug(f"new comment: '{newblock[key]}'")
blockdata[key] = ', '.join([oldblock[key], newblock[key]]) blockdata[key] = ', '.join([oldblock[key], newblock[key]])
except KeyError: except KeyError:
log.debug(f"Key '{key}' missing from block definition so cannot compare. Continuing...") log.debug(f"Key '{key}' missing from block definition so cannot compare. Continuing...")
@ -197,28 +197,30 @@ def apply_mergeplan(oldblock: dict, newblock: dict, mergeplan: str='max') -> dic
raise NotImplementedError(f"Mergeplan '{mergeplan}' not implemented.") raise NotImplementedError(f"Mergeplan '{mergeplan}' not implemented.")
log.debug(f"Block severity set to {blockdata['severity']}") log.debug(f"Block severity set to {blockdata['severity']}")
# Use the severity level to set rejections, if not defined in newblock
# If severity level is 'suspend', it doesn't matter what the settings is for
# 'reject_media' or 'reject_reports'
# blockdata['reject_media'] = newblock.get('reject_media', REJECT_MEDIA_DEFAULT[blockdata['severity']])
# blockdata['reject_reports'] = newblock.get('reject_reports', REJECT_REPORTS_DEFAULT[blockdata['severity']])
# log.debug(f"set reject_media to: {blockdata['reject_media']}")
# log.debug(f"set reject_reports to: {blockdata['reject_reports']}")
return blockdata return blockdata
def fetch_instance_blocklist(token: str, host: str, def fetch_instance_blocklist(host: str, token: str=None, admin: bool=False,
import_fields: list=['domain', 'severity']) -> list: import_fields: list=['domain', 'severity']) -> list:
"""Fetch existing block list from server """Fetch existing block list from server
@param token: The OAuth Bearer token to authenticate with.
@param host: The remote host to connect to. @param host: The remote host to connect to.
@param token: The (optional) OAuth Bearer token to authenticate with.
@param admin: Boolean flag to use the admin API if True.
@param import_fields: A list of fields to import from the remote instance. @param import_fields: A list of fields to import from the remote instance.
@returns: A list of the admin domain blocks from the instance. @returns: A list of the domain blocks from the instance.
""" """
log.info(f"Fetching instance blocklist from {host} ...") log.info(f"Fetching instance blocklist from {host} ...")
if admin:
api_path = "/api/v1/admin/domain_blocks" api_path = "/api/v1/admin/domain_blocks"
else:
api_path = "/api/v1/instance/domain_blocks"
if token:
headers = {'Authorization': f"Bearer {token}"}
else:
headers = {}
url = f"https://{host}{api_path}" url = f"https://{host}{api_path}"
@ -226,16 +228,19 @@ def fetch_instance_blocklist(token: str, host: str,
link = True link = True
while link: while link:
response = requests.get(url, headers={'Authorization': f"Bearer {token}"}) response = requests.get(url, headers=headers)
if response.status_code != 200: if response.status_code != 200:
log.error(f"Cannot fetch remote blocklist: {response.content}") log.error(f"Cannot fetch remote blocklist: {response.content}")
raise ValueError("Unable to fetch domain block list: %s", response) raise ValueError("Unable to fetch domain block list: %s", response)
domain_blocks.extend(json.loads(response.content)) domain_blocks.extend(json.loads(response.content))
# Parse the link header to find the next url to fetch # Parse the link header to find the next url to fetch
# This is a weird and janky way of doing pagination but # This is a weird and janky way of doing pagination but
# hey nothing we can do about it we just have to deal # hey nothing we can do about it we just have to deal
link = response.headers['Link'] link = response.headers.get('Link', None)
if link is None:
break
pagination = link.split(', ') pagination = link.split(', ')
if len(pagination) != 2: if len(pagination) != 2:
link = None link = None
@ -321,7 +326,8 @@ def push_blocklist(token: str, host: str, blocklist: list[dict],
""" """
log.info(f"Pushing blocklist to host {host} ...") log.info(f"Pushing blocklist to host {host} ...")
# Fetch the existing blocklist from the instance # Fetch the existing blocklist from the instance
serverblocks = fetch_instance_blocklist(token, host, import_fields) # Force use of the admin API
serverblocks = fetch_instance_blocklist(host, token, True, import_fields)
# Convert serverblocks to a dictionary keyed by domain name # Convert serverblocks to a dictionary keyed by domain name
knownblocks = {row['domain']: row for row in serverblocks} knownblocks = {row['domain']: row for row in serverblocks}
@ -329,8 +335,8 @@ def push_blocklist(token: str, host: str, blocklist: list[dict],
for newblock in blocklist: for newblock in blocklist:
log.debug(f"applying newblock: {newblock}") log.debug(f"applying newblock: {newblock}")
try: oldblock = knownblocks.get(newblock['domain'], None)
oldblock = knownblocks[newblock['domain']] if oldblock:
log.debug(f"Block already exists for {newblock['domain']}, checking for differences...") log.debug(f"Block already exists for {newblock['domain']}, checking for differences...")
# Check if anything is actually different and needs updating # Check if anything is actually different and needs updating
@ -366,7 +372,7 @@ def push_blocklist(token: str, host: str, blocklist: list[dict],
log.debug("No differences detected. Not updating.") log.debug("No differences detected. Not updating.")
pass pass
except KeyError: else:
# This is a new block for the target instance, so we # This is a new block for the target instance, so we
# need to add a block rather than update an existing one # need to add a block rather than update an existing one
blockdata = { blockdata = {

View File

@ -1,8 +1,12 @@
# List of instances to read blocklists from, # List of instances to read blocklists from.
# with the Bearer token authorised by the instance # If the instance makes its blocklist public, no authorization token is needed.
# Otherwise, `token` is a Bearer token authorised to read domain_blocks.
# If `admin` = True, use the more detailed admin API, which requires a token with a
# higher level of authorization.
blocklist_instance_sources = [ blocklist_instance_sources = [
# { domain = 'eigenmagic.net', token = '<a_token_with_read_auth>' }, # { domain = 'public.blocklist'}, # an instance with a public list of domain_blocks
# { domain = 'jorts.horse', token = '<a_different_token>' }, # { domain = 'jorts.horse', token = '<a_different_token>' }, # user accessible block list
# { domain = 'eigenmagic.net', token = '<a_token_with_read_auth>', admin = true }, # admin access required
] ]
# List of URLs to read csv blocklists from # List of URLs to read csv blocklists from