Added ability to fetch blocklist CSVs via URL.
Added ability to save blocklists to file. Added ability to skip fetch and push actions.
This commit is contained in:
parent
c8e8aa50dc
commit
96a48ec633
|
@ -6,13 +6,16 @@ import toml
|
||||||
import csv
|
import csv
|
||||||
import requests
|
import requests
|
||||||
import json
|
import json
|
||||||
import csv
|
|
||||||
import time
|
import time
|
||||||
|
import os.path
|
||||||
|
import urllib.request as urlr
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
logging.basicConfig(level=logging.DEBUG,
|
logging.basicConfig(level=logging.INFO,
|
||||||
format='%(asctime)s %(levelname)s %(message)s')
|
format='%(asctime)s %(levelname)s %(message)s')
|
||||||
import pprint
|
|
||||||
|
# Max size of a URL-fetched blocklist
|
||||||
|
URL_BLOCKLIST_MAXSIZE = 1024 ** 3
|
||||||
|
|
||||||
log = logging.getLogger('fediblock_sync')
|
log = logging.getLogger('fediblock_sync')
|
||||||
|
|
||||||
|
@ -20,34 +23,49 @@ CONFIGFILE = "/home/mastodon/etc/admin.conf"
|
||||||
|
|
||||||
def sync_blocklists(conf: dict):
|
def sync_blocklists(conf: dict):
|
||||||
"""Sync instance blocklists from remote sources.
|
"""Sync instance blocklists from remote sources.
|
||||||
|
|
||||||
|
@param conf: A configuration dictionary
|
||||||
"""
|
"""
|
||||||
# Build a dict of blocklists we retrieve from remote sources.
|
# Build a dict of blocklists we retrieve from remote sources.
|
||||||
# We will merge these later using a merge algorithm we choose.
|
# We will merge these later using a merge algorithm we choose.
|
||||||
|
|
||||||
blocklists = {}
|
blocklists = {}
|
||||||
# Fetch blocklists from URLs
|
# Fetch blocklists from URLs
|
||||||
# for listurl in conf.blocklist_csv_sources:
|
if not conf.no_fetch_url:
|
||||||
# blocklists[listurl] = {}
|
log.info("Fetching domain blocks from URLs...")
|
||||||
# response = requests.get(url)
|
for listurl in conf.blocklist_url_sources:
|
||||||
# log.debug(f"Fetched blocklist CSV file: {response.content}")
|
blocklists[listurl] = []
|
||||||
|
with urlr.urlopen(listurl) as fp:
|
||||||
|
rawdata = fp.read(URL_BLOCKLIST_MAXSIZE).decode('utf-8')
|
||||||
|
reader = csv.DictReader(rawdata.split('\n'))
|
||||||
|
for row in reader:
|
||||||
|
blocklists[listurl].append(row)
|
||||||
|
if conf.save_intermediate:
|
||||||
|
save_intermediate_blocklist(blocklists[listurl], listurl, conf.savedir)
|
||||||
|
|
||||||
# Fetch blocklists from remote instances
|
# Fetch blocklists from remote instances
|
||||||
for blocklist_src in conf['blocklist_instance_sources']:
|
if not conf.no_fetch_instance:
|
||||||
domain = blocklist_src['domain']
|
log.info("Fetching domain blocks from instances...")
|
||||||
token = blocklist_src['token']
|
for blocklist_src in conf.blocklist_instance_sources:
|
||||||
blocklists[domain] = fetch_instance_blocklist(token, domain)
|
domain = blocklist_src['domain']
|
||||||
|
token = blocklist_src['token']
|
||||||
|
blocklists[domain] = fetch_instance_blocklist(token, domain)
|
||||||
|
if conf.save_intermediate:
|
||||||
|
save_intermediate_blocklist(blocklists[domain], domain, conf.savedir)
|
||||||
|
|
||||||
# Merge blocklists into an update dict
|
# Merge blocklists into an update dict
|
||||||
merged = merge_blocklists(blocklists)
|
merged = merge_blocklists(blocklists)
|
||||||
|
if conf.blocklist_savefile:
|
||||||
# log.debug(f"Merged blocklist ready:\n")
|
log.info(f"Saving merged blocklist to {conf.blocklist_savefile}")
|
||||||
# pprint.pp(merged)
|
save_blocklist_to_file(merged.values(), conf.blocklist_savefile)
|
||||||
|
|
||||||
# Push the blocklist to destination instances
|
# Push the blocklist to destination instances
|
||||||
for dest in conf['blocklist_instance_destinations']:
|
if not conf.no_push_instance:
|
||||||
domain = dest['domain']
|
log.info("Pushing domain blocks to instances...")
|
||||||
token = dest['token']
|
for dest in conf.blocklist_instance_destinations:
|
||||||
push_blocklist(token, domain, merged.values())
|
domain = dest['domain']
|
||||||
|
token = dest['token']
|
||||||
|
push_blocklist(token, domain, merged.values())
|
||||||
|
|
||||||
def merge_blocklists(blocklists: dict, mergeplan: str='max') -> dict:
|
def merge_blocklists(blocklists: dict, mergeplan: str='max') -> dict:
|
||||||
"""Merge fetched remote blocklists into a bulk update
|
"""Merge fetched remote blocklists into a bulk update
|
||||||
|
@ -56,14 +74,10 @@ def merge_blocklists(blocklists: dict, mergeplan: str='max') -> dict:
|
||||||
'max' (the default) uses the highest severity block found
|
'max' (the default) uses the highest severity block found
|
||||||
'min' uses the lowest severity block found
|
'min' uses the lowest severity block found
|
||||||
"""
|
"""
|
||||||
# log.debug(f"Merging blocklists {blocklists} ...")
|
|
||||||
# Remote blocklists may have conflicting overlaps. We need to
|
|
||||||
# decide whether or not to override earlier entries with later
|
|
||||||
# ones or not. How to choose which entry is 'correct'?
|
|
||||||
merged = {}
|
merged = {}
|
||||||
|
|
||||||
for key, blist in blocklists.items():
|
for key, blist in blocklists.items():
|
||||||
log.debug(f"Adding blocks from {key} ...")
|
log.debug(f"Merging blocks from {key} ...")
|
||||||
for blockdef in blist:
|
for blockdef in blist:
|
||||||
# log.debug(f"Checking blockdef {blockdef} ...")
|
# log.debug(f"Checking blockdef {blockdef} ...")
|
||||||
domain = blockdef['domain']
|
domain = blockdef['domain']
|
||||||
|
@ -122,7 +136,12 @@ def merge_blocklists(blocklists: dict, mergeplan: str='max') -> dict:
|
||||||
|
|
||||||
def fetch_instance_blocklist(token: str, host: str) -> list:
|
def fetch_instance_blocklist(token: str, host: str) -> list:
|
||||||
"""Fetch existing block list from server
|
"""Fetch existing block list from server
|
||||||
|
|
||||||
|
@param token: The OAuth Bearer token to authenticate with.
|
||||||
|
@param host: The remote host to connect to.
|
||||||
|
@returns: A list of the admin domain blocks from the instance.
|
||||||
"""
|
"""
|
||||||
|
log.info(f"Fetching instance blocklist from {host} ...")
|
||||||
api_path = "/api/v1/admin/domain_blocks"
|
api_path = "/api/v1/admin/domain_blocks"
|
||||||
|
|
||||||
url = f"https://{host}{api_path}"
|
url = f"https://{host}{api_path}"
|
||||||
|
@ -155,28 +174,6 @@ def fetch_instance_blocklist(token: str, host: str) -> list:
|
||||||
log.debug(f"Found {len(domain_blocks)} existing domain blocks.")
|
log.debug(f"Found {len(domain_blocks)} existing domain blocks.")
|
||||||
return domain_blocks
|
return domain_blocks
|
||||||
|
|
||||||
def export_blocklist(token: str, host: str, outfile: str):
|
|
||||||
"""Export current server blocklist to a csv file"""
|
|
||||||
blocklist = fetch_instance_blocklist(token, host)
|
|
||||||
fieldnames = ['id', 'domain', 'severity', 'reject_media', 'reject_reports', 'private_comment', 'public_comment', 'obfuscate']
|
|
||||||
|
|
||||||
blocklist = sorted(blocklist, key=lambda x: int(x['id']))
|
|
||||||
|
|
||||||
with open(outfile, "w") as fp:
|
|
||||||
writer = csv.DictWriter(fp, fieldnames, extrasaction='ignore')
|
|
||||||
writer.writeheader()
|
|
||||||
writer.writerows(blocklist)
|
|
||||||
|
|
||||||
def delete_blocklist(token: str, host: str, blockfile: str):
|
|
||||||
"""Delete domain blocks listed in blockfile"""
|
|
||||||
with open(blockfile) as fp:
|
|
||||||
reader = csv.DictReader(fp)
|
|
||||||
for row in reader:
|
|
||||||
domain = row['domain']
|
|
||||||
id = row['id']
|
|
||||||
log.debug(f"Deleting {domain} (id: {id}) from blocklist...")
|
|
||||||
delete_block(token, host, id)
|
|
||||||
|
|
||||||
def delete_block(token: str, host: str, id: int):
|
def delete_block(token: str, host: str, id: int):
|
||||||
"""Remove a domain block"""
|
"""Remove a domain block"""
|
||||||
log.debug(f"Removing domain block {id} at {host}...")
|
log.debug(f"Removing domain block {id} at {host}...")
|
||||||
|
@ -236,6 +233,7 @@ def push_blocklist(token: str, host: str, blocklist: list[dict]):
|
||||||
@param host: The instance host, FQDN or IP
|
@param host: The instance host, FQDN or IP
|
||||||
@param blocklist: A list of block definitions. They must include the domain.
|
@param blocklist: A list of block definitions. They must include the domain.
|
||||||
"""
|
"""
|
||||||
|
log.info(f"Pushing blocklist to host {host} ...")
|
||||||
# Fetch the existing blocklist from the instance
|
# Fetch the existing blocklist from the instance
|
||||||
serverblocks = fetch_instance_blocklist(token, host)
|
serverblocks = fetch_instance_blocklist(token, host)
|
||||||
|
|
||||||
|
@ -249,7 +247,7 @@ def push_blocklist(token: str, host: str, blocklist: list[dict]):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
blockdict = knownblocks[row['domain']]
|
blockdict = knownblocks[row['domain']]
|
||||||
log.info(f"Block already exists for {row['domain']}, merging data...")
|
log.debug(f"Block already exists for {row['domain']}, merging data...")
|
||||||
|
|
||||||
# Check if anything is actually different and needs updating
|
# Check if anything is actually different and needs updating
|
||||||
change_needed = False
|
change_needed = False
|
||||||
|
@ -276,7 +274,7 @@ def push_blocklist(token: str, host: str, blocklist: list[dict]):
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
log.info("No differences detected. Not updating.")
|
log.debug("No differences detected. Not updating.")
|
||||||
|
|
||||||
except KeyError:
|
except KeyError:
|
||||||
# domain doesn't have an entry, so we need to create one
|
# domain doesn't have an entry, so we need to create one
|
||||||
|
@ -303,12 +301,59 @@ def load_config(configfile: str):
|
||||||
conf = toml.load(configfile)
|
conf = toml.load(configfile)
|
||||||
return conf
|
return conf
|
||||||
|
|
||||||
def save_intermediate(blocklist: list, source: str, filedir: str):
|
def save_intermediate_blocklist(blocklist: list[dict], source: str, filedir: str):
|
||||||
|
"""Save a local copy of a blocklist we've downloaded
|
||||||
|
"""
|
||||||
|
# Invent a filename based on the remote source
|
||||||
|
# If the source was a URL, convert it to something less messy
|
||||||
|
# If the source was a remote domain, just use the name of the domain
|
||||||
|
log.debug(f"Saving intermediate blocklist from {source}")
|
||||||
|
source = source.replace('/','-')
|
||||||
|
filename = f"{source}.csv"
|
||||||
|
filepath = os.path.join(filedir, filename)
|
||||||
|
save_blocklist_to_file(blocklist, filepath)
|
||||||
|
|
||||||
|
def save_blocklist_to_file(blocklist: list[dict], filepath: str):
|
||||||
"""Save a blocklist we've downloaded from a remote source
|
"""Save a blocklist we've downloaded from a remote source
|
||||||
|
|
||||||
Save a local copy of the remote blocklist after downloading it.
|
@param blocklist: A dictionary of block definitions, keyed by domain
|
||||||
|
@param filepath: The path to the file the list should be saved in.
|
||||||
"""
|
"""
|
||||||
|
blocklist = sorted(blocklist, key=lambda x: x['domain'])
|
||||||
|
|
||||||
|
fieldnames = ['domain', 'severity', 'private_comment', 'public_comment', 'reject_media', 'reject_reports', 'obfuscate']
|
||||||
|
with open(filepath, "w") as fp:
|
||||||
|
writer = csv.DictWriter(fp, fieldnames, extrasaction='ignore')
|
||||||
|
writer.writeheader()
|
||||||
|
writer.writerows(blocklist)
|
||||||
|
|
||||||
|
def augment_args(args):
|
||||||
|
"""Augment commandline arguments with config file parameters"""
|
||||||
|
conf = toml.load(args.config)
|
||||||
|
|
||||||
|
if not args.no_fetch_url:
|
||||||
|
args.no_fetch_url = conf.get('no_fetch_url', False)
|
||||||
|
|
||||||
|
if not args.no_fetch_instance:
|
||||||
|
args.no_fetch_instance = conf.get('no_fetch_instance', False)
|
||||||
|
|
||||||
|
if not args.no_push_instance:
|
||||||
|
args.no_push_instance = conf.get('no_push_instance', False)
|
||||||
|
|
||||||
|
if not args.blocklist_savefile:
|
||||||
|
args.blocklist_savefile = conf.get('blocklist_savefile', None)
|
||||||
|
|
||||||
|
if not args.save_intermediate:
|
||||||
|
args.save_intermediate = conf.get('save_intermediate', False)
|
||||||
|
|
||||||
|
if not args.savedir:
|
||||||
|
args.savedir = conf.get('savedir', '/tmp')
|
||||||
|
|
||||||
|
args.blocklist_url_sources = conf.get('blocklist_url_sources')
|
||||||
|
args.blocklist_instance_sources = conf.get('blocklist_instance_sources')
|
||||||
|
args.blocklist_instance_destinations = conf.get('blocklist_instance_destinations')
|
||||||
|
|
||||||
|
return args
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
@ -316,6 +361,14 @@ if __name__ == '__main__':
|
||||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||||
ap.add_argument('-c', '--config', default='/etc/default/fediblockhole.conf.toml', help="Config file")
|
ap.add_argument('-c', '--config', default='/etc/default/fediblockhole.conf.toml', help="Config file")
|
||||||
|
|
||||||
|
ap.add_argument('-o', '--outfile', dest="blocklist_savefile", help="Save merged blocklist to a local file.")
|
||||||
|
ap.add_argument('-S', '--save-intermediate', dest="save_intermediate", action='store_true', help="Save intermediate blocklists we fetch to local files.")
|
||||||
|
ap.add_argument('-D', '--savedir', dest="savedir", help="Directory path to save intermediate lists.")
|
||||||
|
|
||||||
|
ap.add_argument('--no-fetch-url', dest='no_fetch_url', action='store_true', help="Don't fetch from URLs, even if configured.")
|
||||||
|
ap.add_argument('--no-fetch-instance', dest='no_fetch_instance', action='store_true', help="Don't fetch from instances, even if configured.")
|
||||||
|
ap.add_argument('--no-push-instance', dest='no_push_instance', action='store_true', help="Don't push to instances, even if configured.")
|
||||||
|
|
||||||
ap.add_argument('--loglevel', choices=['debug', 'info', 'warning', 'error', 'critical'], help="Set log output level.")
|
ap.add_argument('--loglevel', choices=['debug', 'info', 'warning', 'error', 'critical'], help="Set log output level.")
|
||||||
|
|
||||||
args = ap.parse_args()
|
args = ap.parse_args()
|
||||||
|
@ -324,7 +377,7 @@ if __name__ == '__main__':
|
||||||
log.setLevel(getattr(logging, levelname))
|
log.setLevel(getattr(logging, levelname))
|
||||||
|
|
||||||
# Load the configuration file
|
# Load the configuration file
|
||||||
conf = load_config(args.config)
|
args = augment_args(args)
|
||||||
|
|
||||||
# Do the work of syncing
|
# Do the work of syncing
|
||||||
sync_blocklists(conf)
|
sync_blocklists(args)
|
|
@ -1,20 +1,35 @@
|
||||||
# List of instances to read blocklists from,
|
# List of instances to read blocklists from,
|
||||||
# with the Bearer token authorised by the instance
|
# with the Bearer token authorised by the instance
|
||||||
blocklist_instance_sources = [
|
blocklist_instance_sources = [
|
||||||
{
|
{ domain = 'eigenmagic.net', token = '<a_token_with_read_auth>' },
|
||||||
domain = 'eigenmagic.net',
|
{ domain = 'jorts.horse', token = '<a_different_token>' },
|
||||||
token = '<a_token_with_read_auth>'
|
]
|
||||||
},
|
|
||||||
{
|
# List of URLs to read csv blocklists from
|
||||||
domain = 'jorts.horse',
|
blocklist_url_sources = [
|
||||||
token = '<a_different_token>'
|
'file:///etc/fediblockhole/blocklist-01.csv',
|
||||||
},
|
'https://github.com/fediblockhole/samples/demo-blocklist-01.csv',
|
||||||
]
|
]
|
||||||
|
|
||||||
# List of instances to write blocklist to
|
# List of instances to write blocklist to
|
||||||
blocklist_instance_destinations = [
|
blocklist_instance_destinations = [
|
||||||
{
|
{ domain = 'eigenmagic.net', token = '<read_write_token>' },
|
||||||
domain = 'eigenmagic.net',
|
]
|
||||||
token = '<read_write_token>'
|
|
||||||
},
|
## Store a local copy of the remote blocklists after we fetch them
|
||||||
]
|
#keep_intermediate = true
|
||||||
|
|
||||||
|
## Directory to store the local blocklist copies
|
||||||
|
# savedir = '/tmp'
|
||||||
|
|
||||||
|
## File to save the fully merged blocklist into
|
||||||
|
# blocklist_savefile = '/tmp/merged_blocklist.csv'
|
||||||
|
|
||||||
|
## Don't push blocklist to instances, even if they're defined above
|
||||||
|
# no_push_instance = false
|
||||||
|
|
||||||
|
## Don't fetch blocklists from URLs, even if they're defined above
|
||||||
|
# no_fetch_url = false
|
||||||
|
|
||||||
|
## Don't fetch blocklists from instances, even if they're defined above
|
||||||
|
# no_fetch_instance = false
|
|
@ -0,0 +1,10 @@
|
||||||
|
"domain","severity","reject_media","reject_reports","private_comment","public_comment","obfuscate"
|
||||||
|
"qoto.org","suspend",TRUE,TRUE,,,TRUE
|
||||||
|
"sealion.club","suspend",TRUE,TRUE,,,TRUE
|
||||||
|
"develop.gab.com","suspend",TRUE,TRUE,,,TRUE
|
||||||
|
"gab.ai","suspend",TRUE,TRUE,,,TRUE
|
||||||
|
"gab.sleeck.eu","suspend",TRUE,TRUE,,,TRUE
|
||||||
|
"gab.com","suspend",TRUE,TRUE,,,TRUE
|
||||||
|
"kiwifarms.is","suspend",TRUE,TRUE,,,TRUE
|
||||||
|
"kiwifarms.net","suspend",TRUE,TRUE,,,TRUE
|
||||||
|
"gabfed.com","suspend",TRUE,TRUE,,,TRUE
|
|
Loading…
Reference in New Issue