Merge branch 'main' into allowlist-thresholds

This commit is contained in:
Justin Warren 2023-01-22 08:27:21 +11:00
commit e59187d98f
No known key found for this signature in database
4 changed files with 72 additions and 35 deletions

View File

@ -6,9 +6,15 @@ This project uses [Semantic Versioning] and generally follows the conventions of
## [Unreleased] ## [Unreleased]
Important planned changes not yet bundled up will be listed here. - Planning to add allowlist thresholds as noted in #28
## [0.4.1] - 2023-01-15 ## [v0.4.2] - 2023-01-19
### Fixed
- Blockdata var already converted to _asdict() (8d3b9da)
## [v0.4.1] - 2023-01-15
Allowlist support. Allowlist support.
@ -44,7 +50,7 @@ Allowlist support.
- Fixed bug in _asdict() of severity level. (9817c99) - Fixed bug in _asdict() of severity level. (9817c99)
- Fix DomainBlock.id usage during __iter__() (a718af5) - Fix DomainBlock.id usage during __iter__() (a718af5)
## [0.4.0] - 2023-01-13 ## [v0.4.0] - 2023-01-13
Substantial changes to better support multiple blocklist formats Substantial changes to better support multiple blocklist formats
@ -80,9 +86,8 @@ Substantial changes to better support multiple blocklist formats
- Fixed bug in severity change detection. (e0d40b5) - Fixed bug in severity change detection. (e0d40b5)
- Fix DomainBlock.id usage during __iter__() (a718af5) - Fix DomainBlock.id usage during __iter__() (a718af5)
-
## [0.3.0] - 2023-01-11 ## [v0.3.0] - 2023-01-11
### Added ### Added
@ -94,7 +99,7 @@ Substantial changes to better support multiple blocklist formats
- Changed min Python version to v3.10. (f37ab70) - Changed min Python version to v3.10. (f37ab70)
## [0.2.1] - 2023-01-10 ## [v0.2.1] - 2023-01-10
### Added ### Added
@ -121,8 +126,9 @@ Substantial changes to better support multiple blocklist formats
[semantic versioning]: https://semver.org/spec/v2.0.0.html [semantic versioning]: https://semver.org/spec/v2.0.0.html
<!-- Versions --> <!-- Versions -->
[unreleased]: https://github.com/eigenmagic/fediblockhole/compare/v0.4.1...HEAD [unreleased]: https://github.com/eigenmagic/fediblockhole/compare/v0.4.2...HEAD
[0.4.1]: https://github.com/eigenmagic/fediblockhole/releases/tag/v0.4.1 [v0.4.2]: https://github.com/eigenmagic/fediblockhole/releases/tag/v0.4.2
[0.4.0]: https://github.com/eigenmagic/fediblockhole/releases/tag/v0.4.0 [v0.4.1]: https://github.com/eigenmagic/fediblockhole/releases/tag/v0.4.1
[0.3.0]: https://github.com/eigenmagic/fediblockhole/releases/tag/v0.3.0 [v0.4.0]: https://github.com/eigenmagic/fediblockhole/releases/tag/v0.4.0
[0.2.1]: https://github.com/eigenmagic/fediblockhole/releases/tag/v0.2.1 [v0.3.0]: https://github.com/eigenmagic/fediblockhole/releases/tag/v0.3.0
[v0.2.1]: https://github.com/eigenmagic/fediblockhole/releases/tag/v0.2.1

View File

@ -1,6 +1,6 @@
[project] [project]
name = "fediblockhole" name = "fediblockhole"
version = "0.4.1" version = "0.4.2"
description = "Federated blocklist management for Mastodon" description = "Federated blocklist management for Mastodon"
readme = "README.md" readme = "README.md"
license = {file = "LICENSE"} license = {file = "LICENSE"}

View File

@ -88,8 +88,9 @@ def sync_blocklists(conf: argparse.Namespace):
for dest in conf.blocklist_instance_destinations: for dest in conf.blocklist_instance_destinations:
domain = dest['domain'] domain = dest['domain']
token = dest['token'] token = dest['token']
scheme = dest.get('scheme', 'https')
max_followed_severity = BlockSeverity(dest.get('max_followed_severity', 'silence')) max_followed_severity = BlockSeverity(dest.get('max_followed_severity', 'silence'))
push_blocklist(token, domain, merged.values(), conf.dryrun, import_fields, max_followed_severity) push_blocklist(token, domain, merged.values(), conf.dryrun, import_fields, max_followed_severity, scheme)
def apply_allowlists(merged: dict, conf: argparse.Namespace, allowlists: dict): def apply_allowlists(merged: dict, conf: argparse.Namespace, allowlists: dict):
"""Apply allowlists """Apply allowlists
@ -164,7 +165,8 @@ def fetch_from_instances(blocklists: dict, sources: dict,
domain = item['domain'] domain = item['domain']
admin = item.get('admin', False) admin = item.get('admin', False)
token = item.get('token', None) token = item.get('token', None)
itemsrc = f"https://{domain}/api" scheme = item.get('scheme', 'https')
itemsrc = f"{scheme}://{domain}/api"
# If import fields are provided, they override the global ones passed in # If import fields are provided, they override the global ones passed in
source_import_fields = item.get('import_fields', None) source_import_fields = item.get('import_fields', None)
@ -173,7 +175,7 @@ def fetch_from_instances(blocklists: dict, sources: dict,
import_fields = IMPORT_FIELDS.extend(source_import_fields) import_fields = IMPORT_FIELDS.extend(source_import_fields)
# Add the blocklist with the domain as the source key # Add the blocklist with the domain as the source key
blocklists[itemsrc] = fetch_instance_blocklist(domain, token, admin, import_fields) blocklists[itemsrc] = fetch_instance_blocklist(domain, token, admin, import_fields, scheme)
if save_intermediate: if save_intermediate:
save_intermediate_blocklist(blocklists[itemsrc], domain, savedir, export_fields) save_intermediate_blocklist(blocklists[itemsrc], domain, savedir, export_fields)
return blocklists return blocklists
@ -358,7 +360,8 @@ def requests_headers(token: str=None):
return headers return headers
def fetch_instance_blocklist(host: str, token: str=None, admin: bool=False, def fetch_instance_blocklist(host: str, token: str=None, admin: bool=False,
import_fields: list=['domain', 'severity']) -> list[DomainBlock]: import_fields: list=['domain', 'severity'],
scheme: str='https') -> list[DomainBlock]:
"""Fetch existing block list from server """Fetch existing block list from server
@param host: The remote host to connect to. @param host: The remote host to connect to.
@ -371,12 +374,14 @@ def fetch_instance_blocklist(host: str, token: str=None, admin: bool=False,
if admin: if admin:
api_path = "/api/v1/admin/domain_blocks" api_path = "/api/v1/admin/domain_blocks"
parse_format = 'json'
else: else:
api_path = "/api/v1/instance/domain_blocks" api_path = "/api/v1/instance/domain_blocks"
parse_format = 'mastodon_api_public'
headers = requests_headers(token) headers = requests_headers(token)
url = f"https://{host}{api_path}" url = f"{scheme}://{host}{api_path}"
blocklist = [] blocklist = []
link = True link = True
@ -387,7 +392,7 @@ def fetch_instance_blocklist(host: str, token: str=None, admin: bool=False,
log.error(f"Cannot fetch remote blocklist: {response.content}") log.error(f"Cannot fetch remote blocklist: {response.content}")
raise ValueError("Unable to fetch domain block list: %s", response) raise ValueError("Unable to fetch domain block list: %s", response)
blocklist.extend( parse_blocklist(response.content, 'json', import_fields) ) blocklist.extend( parse_blocklist(response.content, parse_format, import_fields) )
# Parse the link header to find the next url to fetch # Parse the link header to find the next url to fetch
# This is a weird and janky way of doing pagination but # This is a weird and janky way of doing pagination but
@ -408,12 +413,12 @@ def fetch_instance_blocklist(host: str, token: str=None, admin: bool=False,
return blocklist return blocklist
def delete_block(token: str, host: str, id: int): def delete_block(token: str, host: str, id: int, scheme: str='https'):
"""Remove a domain block""" """Remove a domain block"""
log.debug(f"Removing domain block {id} at {host}...") log.debug(f"Removing domain block {id} at {host}...")
api_path = "/api/v1/admin/domain_blocks/" api_path = "/api/v1/admin/domain_blocks/"
url = f"https://{host}{api_path}{id}" url = f"{scheme}://{host}{api_path}{id}"
response = requests.delete(url, response = requests.delete(url,
headers=requests_headers(token), headers=requests_headers(token),
@ -426,7 +431,7 @@ def delete_block(token: str, host: str, id: int):
raise ValueError(f"Something went wrong: {response.status_code}: {response.content}") raise ValueError(f"Something went wrong: {response.status_code}: {response.content}")
def fetch_instance_follows(token: str, host: str, domain: str) -> int: def fetch_instance_follows(token: str, host: str, domain: str, scheme: str='https') -> int:
"""Fetch the followers of the target domain at the instance """Fetch the followers of the target domain at the instance
@param token: the Bearer authentication token for OAuth access @param token: the Bearer authentication token for OAuth access
@ -435,7 +440,7 @@ def fetch_instance_follows(token: str, host: str, domain: str) -> int:
@returns: int, number of local followers of remote instance accounts @returns: int, number of local followers of remote instance accounts
""" """
api_path = "/api/v1/admin/measures" api_path = "/api/v1/admin/measures"
url = f"https://{host}{api_path}" url = f"{scheme}://{host}{api_path}"
key = 'instance_follows' key = 'instance_follows'
@ -466,7 +471,8 @@ def fetch_instance_follows(token: str, host: str, domain: str) -> int:
def check_followed_severity(host: str, token: str, domain: str, def check_followed_severity(host: str, token: str, domain: str,
severity: BlockSeverity, severity: BlockSeverity,
max_followed_severity: BlockSeverity=BlockSeverity('silence')): max_followed_severity: BlockSeverity=BlockSeverity('silence'),
scheme: str='https'):
"""Check an instance to see if it has followers of a to-be-blocked instance""" """Check an instance to see if it has followers of a to-be-blocked instance"""
log.debug("Checking followed severity...") log.debug("Checking followed severity...")
@ -477,7 +483,7 @@ def check_followed_severity(host: str, token: str, domain: str,
# If the instance has accounts that follow people on the to-be-blocked domain, # If the instance has accounts that follow people on the to-be-blocked domain,
# limit the maximum severity to the configured `max_followed_severity`. # limit the maximum severity to the configured `max_followed_severity`.
log.debug("checking for instance follows...") log.debug("checking for instance follows...")
follows = fetch_instance_follows(token, host, domain) follows = fetch_instance_follows(token, host, domain, scheme)
time.sleep(API_CALL_DELAY) time.sleep(API_CALL_DELAY)
if follows > 0: if follows > 0:
log.debug(f"Instance {host} has {follows} followers of accounts at {domain}.") log.debug(f"Instance {host} has {follows} followers of accounts at {domain}.")
@ -490,7 +496,7 @@ def is_change_needed(oldblock: dict, newblock: dict, import_fields: list):
change_needed = oldblock.compare_fields(newblock, import_fields) change_needed = oldblock.compare_fields(newblock, import_fields)
return change_needed return change_needed
def update_known_block(token: str, host: str, block: DomainBlock): def update_known_block(token: str, host: str, block: DomainBlock, scheme: str='https'):
"""Update an existing domain block with information in blockdict""" """Update an existing domain block with information in blockdict"""
api_path = "/api/v1/admin/domain_blocks/" api_path = "/api/v1/admin/domain_blocks/"
@ -502,23 +508,23 @@ def update_known_block(token: str, host: str, block: DomainBlock):
import pdb import pdb
pdb.set_trace() pdb.set_trace()
url = f"https://{host}{api_path}{id}" url = f"{scheme}://{host}{api_path}{id}"
response = requests.put(url, response = requests.put(url,
headers=requests_headers(token), headers=requests_headers(token),
json=blockdata._asdict(), json=blockdata,
timeout=REQUEST_TIMEOUT timeout=REQUEST_TIMEOUT
) )
if response.status_code != 200: if response.status_code != 200:
raise ValueError(f"Something went wrong: {response.status_code}: {response.content}") raise ValueError(f"Something went wrong: {response.status_code}: {response.content}")
def add_block(token: str, host: str, blockdata: DomainBlock): def add_block(token: str, host: str, blockdata: DomainBlock, scheme: str='https'):
"""Block a domain on Mastodon host """Block a domain on Mastodon host
""" """
log.debug(f"Adding block entry for {blockdata.domain} at {host}...") log.debug(f"Adding block entry for {blockdata.domain} at {host}...")
api_path = "/api/v1/admin/domain_blocks" api_path = "/api/v1/admin/domain_blocks"
url = f"https://{host}{api_path}" url = f"{scheme}://{host}{api_path}"
response = requests.post(url, response = requests.post(url,
headers=requests_headers(token), headers=requests_headers(token),
@ -538,6 +544,7 @@ def push_blocklist(token: str, host: str, blocklist: list[dict],
dryrun: bool=False, dryrun: bool=False,
import_fields: list=['domain', 'severity'], import_fields: list=['domain', 'severity'],
max_followed_severity:BlockSeverity=BlockSeverity('silence'), max_followed_severity:BlockSeverity=BlockSeverity('silence'),
scheme: str='https',
): ):
"""Push a blocklist to a remote instance. """Push a blocklist to a remote instance.
@ -554,7 +561,7 @@ def push_blocklist(token: str, host: str, blocklist: list[dict],
# Force use of the admin API, and add 'id' to the list of fields # Force use of the admin API, and add 'id' to the list of fields
if 'id' not in import_fields: if 'id' not in import_fields:
import_fields.append('id') import_fields.append('id')
serverblocks = fetch_instance_blocklist(host, token, True, import_fields) serverblocks = fetch_instance_blocklist(host, token, True, import_fields, scheme)
# # Convert serverblocks to a dictionary keyed by domain name # # Convert serverblocks to a dictionary keyed by domain name
knownblocks = {row.domain: row for row in serverblocks} knownblocks = {row.domain: row for row in serverblocks}
@ -575,7 +582,7 @@ def push_blocklist(token: str, host: str, blocklist: list[dict],
# Confirm if we really want to change the severity # Confirm if we really want to change the severity
# If we still have followers of the remote domain, we may not # If we still have followers of the remote domain, we may not
# want to go all the way to full suspend, depending on the configuration # want to go all the way to full suspend, depending on the configuration
newseverity = check_followed_severity(host, token, oldblock.domain, newblock.severity, max_followed_severity) newseverity = check_followed_severity(host, token, oldblock.domain, newblock.severity, max_followed_severity, scheme)
if newseverity != oldblock.severity: if newseverity != oldblock.severity:
newblock.severity = newseverity newblock.severity = newseverity
else: else:
@ -591,7 +598,7 @@ def push_blocklist(token: str, host: str, blocklist: list[dict],
log.debug(f"Block as dict: {blockdata._asdict()}") log.debug(f"Block as dict: {blockdata._asdict()}")
if not dryrun: if not dryrun:
update_known_block(token, host, blockdata) update_known_block(token, host, blockdata, scheme)
# add a pause here so we don't melt the instance # add a pause here so we don't melt the instance
time.sleep(API_CALL_DELAY) time.sleep(API_CALL_DELAY)
else: else:
@ -608,9 +615,9 @@ def push_blocklist(token: str, host: str, blocklist: list[dict],
log.debug(f"Block as dict: {newblock._asdict()}") log.debug(f"Block as dict: {newblock._asdict()}")
# Make sure the new block doesn't clobber a domain with followers # Make sure the new block doesn't clobber a domain with followers
newblock.severity = check_followed_severity(host, token, newblock.domain, newblock.severity, max_followed_severity) newblock.severity = check_followed_severity(host, token, newblock.domain, newblock.severity, max_followed_severity, scheme)
if not dryrun: if not dryrun:
add_block(token, host, newblock) add_block(token, host, newblock, scheme)
# add a pause here so we don't melt the instance # add a pause here so we don't melt the instance
time.sleep(API_CALL_DELAY) time.sleep(API_CALL_DELAY)
else: else:

View File

@ -89,7 +89,7 @@ class BlocklistParserJSON(BlocklistParser):
""" """
return json.loads(blockdata) return json.loads(blockdata)
def parse_item(self, blockitem: str) -> DomainBlock: def parse_item(self, blockitem: dict) -> DomainBlock:
# Remove fields we don't want to import # Remove fields we don't want to import
origitem = blockitem.copy() origitem = blockitem.copy()
for key in origitem: for key in origitem:
@ -103,6 +103,29 @@ class BlocklistParserJSON(BlocklistParser):
block.severity = self.max_severity block.severity = self.max_severity
return block return block
class BlocklistParserMastodonAPIPublic(BlocklistParserJSON):
"""The public blocklist API is slightly different to the admin one"""
def parse_item(self, blockitem: dict) -> DomainBlock:
# Remove fields we don't want to import
origitem = blockitem.copy()
for key in origitem:
# The Mastodon public API uses the 'public' field
# to mean 'public_comment' because what even is consistency?
if key == 'comment':
key = 'public_comment'
blockitem['public_comment'] = blockitem['comment']
del blockitem['comment']
if key not in self.import_fields:
del blockitem[key]
# Convert dict to NamedTuple with the double-star operator
# See: https://docs.python.org/3/tutorial/controlflow.html#tut-unpacking-arguments
block = DomainBlock(**blockitem)
if block.severity > self.max_severity:
block.severity = self.max_severity
return block
class BlocklistParserCSV(BlocklistParser): class BlocklistParserCSV(BlocklistParser):
""" Parse CSV formatted blocklists """ Parse CSV formatted blocklists
@ -200,6 +223,7 @@ def str2bool(boolstring: str) -> bool:
FORMAT_PARSERS = { FORMAT_PARSERS = {
'csv': BlocklistParserCSV, 'csv': BlocklistParserCSV,
'json': BlocklistParserJSON, 'json': BlocklistParserJSON,
'mastodon_api_public': BlocklistParserMastodonAPIPublic,
'rapidblock.csv': RapidBlockParserCSV, 'rapidblock.csv': RapidBlockParserCSV,
'rapidblock.json': RapidBlockParserJSON, 'rapidblock.json': RapidBlockParserJSON,
} }