Merge pull request #8 from eigenmagic/public-block-fetch
Read from instances that publish domain_blocks.
This commit is contained in:
commit
a134870f14
|
@ -90,8 +90,9 @@ def sync_blocklists(conf: dict):
|
||||||
log.info("Fetching domain blocks from instances...")
|
log.info("Fetching domain blocks from instances...")
|
||||||
for blocklist_src in conf.blocklist_instance_sources:
|
for blocklist_src in conf.blocklist_instance_sources:
|
||||||
domain = blocklist_src['domain']
|
domain = blocklist_src['domain']
|
||||||
token = blocklist_src['token']
|
admin = blocklist_src.get('admin', False)
|
||||||
blocklists[domain] = fetch_instance_blocklist(token, domain, import_fields)
|
token = blocklist_src.get('token', None)
|
||||||
|
blocklists[domain] = fetch_instance_blocklist(domain, token, admin, import_fields)
|
||||||
if conf.save_intermediate:
|
if conf.save_intermediate:
|
||||||
save_intermediate_blocklist(blocklists[domain], domain, conf.savedir, export_fields)
|
save_intermediate_blocklist(blocklists[domain], domain, conf.savedir, export_fields)
|
||||||
|
|
||||||
|
@ -119,25 +120,22 @@ def merge_blocklists(blocklists: dict, mergeplan: str='max') -> dict:
|
||||||
merged = {}
|
merged = {}
|
||||||
|
|
||||||
for key, blist in blocklists.items():
|
for key, blist in blocklists.items():
|
||||||
log.debug(f"processing key {key} blist...")
|
log.debug(f"processing blocklist from: {key} ...")
|
||||||
for newblock in blist:
|
for newblock in blist:
|
||||||
domain = newblock['domain']
|
domain = newblock['domain']
|
||||||
if domain in merged:
|
# If the domain has two asterisks in it, it's obfuscated
|
||||||
|
# and we can't really use it, so skip it and do the next one
|
||||||
|
if '**' in domain:
|
||||||
|
log.debug(f"Domain '{domain}' is obfuscated. Skipping it.")
|
||||||
|
continue
|
||||||
|
|
||||||
|
elif domain in merged:
|
||||||
log.debug(f"Overlapping block for domain {domain}. Merging...")
|
log.debug(f"Overlapping block for domain {domain}. Merging...")
|
||||||
blockdata = apply_mergeplan(merged[domain], newblock, mergeplan)
|
blockdata = apply_mergeplan(merged[domain], newblock, mergeplan)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# New block
|
# New block
|
||||||
blockdata = newblock
|
blockdata = newblock
|
||||||
# blockdata = {
|
|
||||||
# 'domain': newblock['domain'],
|
|
||||||
# # Default to Silence if nothing is specified
|
|
||||||
# 'severity': newblock.get('severity', 'silence'),
|
|
||||||
# 'public_comment': newblock.get('public_comment', ''),
|
|
||||||
# 'obfuscate': newblock.get('obfuscate', True), # default obfuscate to True
|
|
||||||
# }
|
|
||||||
# sev = blockdata['severity'] # convenience variable
|
|
||||||
# blockdata['reject_media'] = newblock.get('reject_media', REJECT_MEDIA_DEFAULT[sev])
|
|
||||||
# blockdata['reject_reports'] = newblock.get('reject_reports', REJECT_REPORTS_DEFAULT[sev])
|
|
||||||
|
|
||||||
# end if
|
# end if
|
||||||
log.debug(f"blockdata is: {blockdata}")
|
log.debug(f"blockdata is: {blockdata}")
|
||||||
|
@ -161,7 +159,9 @@ def apply_mergeplan(oldblock: dict, newblock: dict, mergeplan: str='max') -> dic
|
||||||
keylist = ['public_comment', 'private_comment']
|
keylist = ['public_comment', 'private_comment']
|
||||||
for key in keylist:
|
for key in keylist:
|
||||||
try:
|
try:
|
||||||
if oldblock[key] != newblock[key] and newblock[key] not in ['', None]:
|
if oldblock[key] not in ['', None] and newblock[key] not in ['', None] and oldblock[key] != newblock[key]:
|
||||||
|
log.debug(f"old comment: '{oldblock[key]}'")
|
||||||
|
log.debug(f"new comment: '{newblock[key]}'")
|
||||||
blockdata[key] = ', '.join([oldblock[key], newblock[key]])
|
blockdata[key] = ', '.join([oldblock[key], newblock[key]])
|
||||||
except KeyError:
|
except KeyError:
|
||||||
log.debug(f"Key '{key}' missing from block definition so cannot compare. Continuing...")
|
log.debug(f"Key '{key}' missing from block definition so cannot compare. Continuing...")
|
||||||
|
@ -197,28 +197,30 @@ def apply_mergeplan(oldblock: dict, newblock: dict, mergeplan: str='max') -> dic
|
||||||
raise NotImplementedError(f"Mergeplan '{mergeplan}' not implemented.")
|
raise NotImplementedError(f"Mergeplan '{mergeplan}' not implemented.")
|
||||||
|
|
||||||
log.debug(f"Block severity set to {blockdata['severity']}")
|
log.debug(f"Block severity set to {blockdata['severity']}")
|
||||||
# Use the severity level to set rejections, if not defined in newblock
|
|
||||||
# If severity level is 'suspend', it doesn't matter what the settings is for
|
|
||||||
# 'reject_media' or 'reject_reports'
|
|
||||||
# blockdata['reject_media'] = newblock.get('reject_media', REJECT_MEDIA_DEFAULT[blockdata['severity']])
|
|
||||||
# blockdata['reject_reports'] = newblock.get('reject_reports', REJECT_REPORTS_DEFAULT[blockdata['severity']])
|
|
||||||
|
|
||||||
# log.debug(f"set reject_media to: {blockdata['reject_media']}")
|
|
||||||
# log.debug(f"set reject_reports to: {blockdata['reject_reports']}")
|
|
||||||
|
|
||||||
return blockdata
|
return blockdata
|
||||||
|
|
||||||
def fetch_instance_blocklist(token: str, host: str,
|
def fetch_instance_blocklist(host: str, token: str=None, admin: bool=False,
|
||||||
import_fields: list=['domain', 'severity']) -> list:
|
import_fields: list=['domain', 'severity']) -> list:
|
||||||
"""Fetch existing block list from server
|
"""Fetch existing block list from server
|
||||||
|
|
||||||
@param token: The OAuth Bearer token to authenticate with.
|
|
||||||
@param host: The remote host to connect to.
|
@param host: The remote host to connect to.
|
||||||
|
@param token: The (optional) OAuth Bearer token to authenticate with.
|
||||||
|
@param admin: Boolean flag to use the admin API if True.
|
||||||
@param import_fields: A list of fields to import from the remote instance.
|
@param import_fields: A list of fields to import from the remote instance.
|
||||||
@returns: A list of the admin domain blocks from the instance.
|
@returns: A list of the domain blocks from the instance.
|
||||||
"""
|
"""
|
||||||
log.info(f"Fetching instance blocklist from {host} ...")
|
log.info(f"Fetching instance blocklist from {host} ...")
|
||||||
|
|
||||||
|
if admin:
|
||||||
api_path = "/api/v1/admin/domain_blocks"
|
api_path = "/api/v1/admin/domain_blocks"
|
||||||
|
else:
|
||||||
|
api_path = "/api/v1/instance/domain_blocks"
|
||||||
|
|
||||||
|
if token:
|
||||||
|
headers = {'Authorization': f"Bearer {token}"}
|
||||||
|
else:
|
||||||
|
headers = {}
|
||||||
|
|
||||||
url = f"https://{host}{api_path}"
|
url = f"https://{host}{api_path}"
|
||||||
|
|
||||||
|
@ -226,16 +228,19 @@ def fetch_instance_blocklist(token: str, host: str,
|
||||||
link = True
|
link = True
|
||||||
|
|
||||||
while link:
|
while link:
|
||||||
response = requests.get(url, headers={'Authorization': f"Bearer {token}"})
|
response = requests.get(url, headers=headers)
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
log.error(f"Cannot fetch remote blocklist: {response.content}")
|
log.error(f"Cannot fetch remote blocklist: {response.content}")
|
||||||
raise ValueError("Unable to fetch domain block list: %s", response)
|
raise ValueError("Unable to fetch domain block list: %s", response)
|
||||||
|
|
||||||
domain_blocks.extend(json.loads(response.content))
|
domain_blocks.extend(json.loads(response.content))
|
||||||
|
|
||||||
# Parse the link header to find the next url to fetch
|
# Parse the link header to find the next url to fetch
|
||||||
# This is a weird and janky way of doing pagination but
|
# This is a weird and janky way of doing pagination but
|
||||||
# hey nothing we can do about it we just have to deal
|
# hey nothing we can do about it we just have to deal
|
||||||
link = response.headers['Link']
|
link = response.headers.get('Link', None)
|
||||||
|
if link is None:
|
||||||
|
break
|
||||||
pagination = link.split(', ')
|
pagination = link.split(', ')
|
||||||
if len(pagination) != 2:
|
if len(pagination) != 2:
|
||||||
link = None
|
link = None
|
||||||
|
@ -321,7 +326,8 @@ def push_blocklist(token: str, host: str, blocklist: list[dict],
|
||||||
"""
|
"""
|
||||||
log.info(f"Pushing blocklist to host {host} ...")
|
log.info(f"Pushing blocklist to host {host} ...")
|
||||||
# Fetch the existing blocklist from the instance
|
# Fetch the existing blocklist from the instance
|
||||||
serverblocks = fetch_instance_blocklist(token, host, import_fields)
|
# Force use of the admin API
|
||||||
|
serverblocks = fetch_instance_blocklist(host, token, True, import_fields)
|
||||||
|
|
||||||
# Convert serverblocks to a dictionary keyed by domain name
|
# Convert serverblocks to a dictionary keyed by domain name
|
||||||
knownblocks = {row['domain']: row for row in serverblocks}
|
knownblocks = {row['domain']: row for row in serverblocks}
|
||||||
|
@ -329,8 +335,8 @@ def push_blocklist(token: str, host: str, blocklist: list[dict],
|
||||||
for newblock in blocklist:
|
for newblock in blocklist:
|
||||||
|
|
||||||
log.debug(f"applying newblock: {newblock}")
|
log.debug(f"applying newblock: {newblock}")
|
||||||
try:
|
oldblock = knownblocks.get(newblock['domain'], None)
|
||||||
oldblock = knownblocks[newblock['domain']]
|
if oldblock:
|
||||||
log.debug(f"Block already exists for {newblock['domain']}, checking for differences...")
|
log.debug(f"Block already exists for {newblock['domain']}, checking for differences...")
|
||||||
|
|
||||||
# Check if anything is actually different and needs updating
|
# Check if anything is actually different and needs updating
|
||||||
|
@ -366,7 +372,7 @@ def push_blocklist(token: str, host: str, blocklist: list[dict],
|
||||||
log.debug("No differences detected. Not updating.")
|
log.debug("No differences detected. Not updating.")
|
||||||
pass
|
pass
|
||||||
|
|
||||||
except KeyError:
|
else:
|
||||||
# This is a new block for the target instance, so we
|
# This is a new block for the target instance, so we
|
||||||
# need to add a block rather than update an existing one
|
# need to add a block rather than update an existing one
|
||||||
blockdata = {
|
blockdata = {
|
||||||
|
|
|
@ -1,8 +1,12 @@
|
||||||
# List of instances to read blocklists from,
|
# List of instances to read blocklists from.
|
||||||
# with the Bearer token authorised by the instance
|
# If the instance makes its blocklist public, no authorization token is needed.
|
||||||
|
# Otherwise, `token` is a Bearer token authorised to read domain_blocks.
|
||||||
|
# If `admin` = True, use the more detailed admin API, which requires a token with a
|
||||||
|
# higher level of authorization.
|
||||||
blocklist_instance_sources = [
|
blocklist_instance_sources = [
|
||||||
# { domain = 'eigenmagic.net', token = '<a_token_with_read_auth>' },
|
# { domain = 'public.blocklist'}, # an instance with a public list of domain_blocks
|
||||||
# { domain = 'jorts.horse', token = '<a_different_token>' },
|
# { domain = 'jorts.horse', token = '<a_different_token>' }, # user accessible block list
|
||||||
|
# { domain = 'eigenmagic.net', token = '<a_token_with_read_auth>', admin = true }, # admin access required
|
||||||
]
|
]
|
||||||
|
|
||||||
# List of URLs to read csv blocklists from
|
# List of URLs to read csv blocklists from
|
||||||
|
|
Loading…
Reference in New Issue