diff --git a/CHANGELOG.md b/CHANGELOG.md index 0ac7b51..c4841a9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,23 @@ This project uses [Semantic Versioning] and generally follows the conventions of ## [Unreleased] -- Planning to add allowlist thresholds as noted in #28 +## [v0.4.3] - 2023-02-13 + +### Added + +- Added Mastodon public API parser type because #33 (9fe9342) +- Added ability to set scheme when talking to instances (9fe9342) +- Added tests of comment merging. (fb3a7ec) +- Added blocklist thresholds. (bb1d89e) +- Added logging to help debug threshold-based merging. (b67ff0c) +- Added extra documentation on configuring thresholds. (6c72af8) +- Updated documentation to reflect Mastodon v4.1.0 changes to the application scopes screen. (b92dd21) + +### Changed + +- Dropped minimum Python version to 3.6 (df3c16f) +- Don't merge comments if new comment is empty. (b8aa11e) +- Tweaked comment merging to pass tests. (fb3a7ec) ## [v0.4.2] - 2023-01-19 diff --git a/README.md b/README.md index 44a9864..a0fc832 100644 --- a/README.md +++ b/README.md @@ -41,6 +41,8 @@ appropriate. - Provides (hopefully) sensible defaults to minimise first-time setup. - Global and fine-grained configuration options available for those complex situations that crop up sometimes. + - Allowlists to override blocks in blocklists to ensure you never block instances you want to keep. + - Blocklist thresholds if you want to only block when an instance shows up in multiple blocklists. ## Installing @@ -79,17 +81,16 @@ admin to add a new Application at `https:///settings/applications/` and then tell you the access token. -The application needs the `admin:read:domain_blocks` OAuth scope, but -unfortunately this scope isn't available in the current application screen -(v4.0.2 of Mastodon at time of writing, but this has been fixed in the main -branch). +The application needs the `admin:read:domain_blocks` OAuth scope. You can allow +full `admin:read` access, but be aware that this authorizes someone to read all +the data in the instance. That's asking a lot of a remote instance admin who +just wants to share domain_blocks with you. -You can allow full `admin:read` access, but be aware that this authorizes -someone to read all the data in the instance. That's asking a lot of a remote -instance admin who just wants to share domain_blocks with you. +The `admin:read:domain_blocks` scope is available as of Mastodon v4.1.0, but for +earlier versions admins will need to use the manual method described below. -For now, you can ask the instance admin to update the scope in the database -directly like this: +You can update the scope for your application in the database directly like +this: ``` UPDATE oauth_applications as app @@ -134,8 +135,12 @@ chmod o-r ``` You can also grant full `admin:write` scope to the application, but if you'd -prefer to keep things more tightly secured you'll need to use SQL to set the -scopes in the database and then regenerate the token: +prefer to keep things more tightly secured, limit the scope to +`admin:read:domain_blocks`. + +Again, this scope is only available in the application config screen as of +Mastodon v4.1.0. If your instance is on an earlier version, you'll need to use +SQL to set the scopes in the database and then regenerate the token: ``` UPDATE oauth_applications as app @@ -192,6 +197,7 @@ Supported formats are currently: - Comma-Separated Values (CSV) - JSON + - Mastodon v4.1 flavoured CSV - RapidBlock CSV - RapidBlock JSON @@ -209,6 +215,17 @@ A CSV format blocklist must contain a header row with at least a `domain` and `s Optional fields, as listed about, may also be included. +#### Mastodon v4.1 CSV format + +As of v4.1.0, Mastodon can export domain blocks as a CSV file. However, in their +infinite wisdom, the Mastodon devs decided that field names should begin with a +`#` character in the header, unlike the field names in the JSON output via the +API… or in pretty much any other CSV file anywhere else. + +Setting the format to `mastodon_csv` will strip off the `#` character when +parsing and FediBlockHole can then use Mastodon v4.1 CSV blocklists like any +other CSV formatted blocklist. + #### JSON format JSON is also supported. It uses the same format as the JSON returned from the Mastodon API. diff --git a/etc/sample.fediblockhole.conf.toml b/etc/sample.fediblockhole.conf.toml index e377e97..bd93663 100644 --- a/etc/sample.fediblockhole.conf.toml +++ b/etc/sample.fediblockhole.conf.toml @@ -56,6 +56,24 @@ blocklist_instance_destinations = [ # The 'min' mergeplan will use the lightest severity block found for a domain. # mergeplan = 'max' +## Optional threshold-based merging. +# Only merge in domain blocks if the domain is mentioned in +# at least `threshold` blocklists. +# `merge_thresold` is an integer, with a default value of 0. +# The `merge_threshold_type` can be `count` or `pct`. +# If `count` type is selected, the threshold is reached when the domain +# is mentioned in at least `merge_threshold` blocklists. The default value +# of 0 means that every block in every list will be merged in. +# If `pct` type is selected, `merge_threshold` is interpreted as a percentage, +# i.e. if `merge_threshold` = 20, blocks will only be merged in if the domain +# is present in at least 20% of blocklists. +# Percentage calculated as number_of_mentions / total_number_of_blocklists. +# The percentage method is more flexibile, but also more complicated, so take care +# when using it. +# +# merge_threshold_type = 'count' +# merge_threshold = 0 + ## Set which fields we import ## 'domain' and 'severity' are always imported, these are additional ## diff --git a/pyproject.toml b/pyproject.toml index 4fddc2b..2736623 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,10 +1,10 @@ [project] name = "fediblockhole" -version = "0.4.2" +version = "0.4.3" description = "Federated blocklist management for Mastodon" readme = "README.md" license = {file = "LICENSE"} -requires-python = ">=3.10" +requires-python = ">=3.6" keywords = ["mastodon", "fediblock"] authors = [ {name = "Justin Warren"}, {email = "justin@eigenmagic.com"} @@ -17,6 +17,10 @@ classifiers = [ "Natural Language :: English", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.6", ] dependencies = [ "requests", diff --git a/samples/demo-allowlist-01.csv b/samples/demo-allowlist-01.csv index 6ee7744..665ff6a 100644 --- a/samples/demo-allowlist-01.csv +++ b/samples/demo-allowlist-01.csv @@ -1,3 +1,4 @@ "domain","severity","private_comment","public_comment","reject_media","reject_reports","obfuscate" -"eigenmagic.net","noop","Never block me","Only the domain field matters",False,False,False -"example.org","noop","Never block me either","The severity is ignored as are all other fields",False,False,False +"eigenmagic.net","noop","Never block me","Only the domain field matters for allowlists",False,False,False +"example.org","noop","Never block me either","The severity is ignored in allowlists as are all other fields",False,False,False +"demo01.example.org","noop","Never block me either","But you can use them to leave yourself or others notes on why the item is here",False,False,False diff --git a/src/fediblockhole/__init__.py b/src/fediblockhole/__init__.py index 67617d6..8a0823c 100755 --- a/src/fediblockhole/__init__.py +++ b/src/fediblockhole/__init__.py @@ -11,7 +11,7 @@ import os.path import sys import urllib.request as urlr -from .blocklist_parser import parse_blocklist +from .blocklists import Blocklist, parse_blocklist from .const import DomainBlock, BlockSeverity from importlib.metadata import version @@ -59,19 +59,19 @@ def sync_blocklists(conf: argparse.Namespace): # Add extra export fields if defined in config export_fields.extend(conf.export_fields) - blocklists = {} + blocklists = [] # Fetch blocklists from URLs if not conf.no_fetch_url: - blocklists = fetch_from_urls(blocklists, conf.blocklist_url_sources, - import_fields, conf.save_intermediate, conf.savedir, export_fields) + blocklists.extend(fetch_from_urls(conf.blocklist_url_sources, + import_fields, conf.save_intermediate, conf.savedir, export_fields)) # Fetch blocklists from remote instances if not conf.no_fetch_instance: - blocklists = fetch_from_instances(blocklists, conf.blocklist_instance_sources, - import_fields, conf.save_intermediate, conf.savedir, export_fields) + blocklists.extend(fetch_from_instances(conf.blocklist_instance_sources, + import_fields, conf.save_intermediate, conf.savedir, export_fields)) # Merge blocklists into an update dict - merged = merge_blocklists(blocklists, conf.mergeplan) + merged = merge_blocklists(blocklists, conf.mergeplan, conf.merge_threshold, conf.merge_threshold_type) # Remove items listed in allowlists, if any allowlists = fetch_allowlists(conf) @@ -80,48 +80,48 @@ def sync_blocklists(conf: argparse.Namespace): # Save the final mergelist, if requested if conf.blocklist_savefile: log.info(f"Saving merged blocklist to {conf.blocklist_savefile}") - save_blocklist_to_file(merged.values(), conf.blocklist_savefile, export_fields) + save_blocklist_to_file(merged, conf.blocklist_savefile, export_fields) # Push the blocklist to destination instances if not conf.no_push_instance: log.info("Pushing domain blocks to instances...") for dest in conf.blocklist_instance_destinations: - domain = dest['domain'] + target = dest['domain'] token = dest['token'] scheme = dest.get('scheme', 'https') max_followed_severity = BlockSeverity(dest.get('max_followed_severity', 'silence')) - push_blocklist(token, domain, merged.values(), conf.dryrun, import_fields, max_followed_severity, scheme) + push_blocklist(token, target, merged, conf.dryrun, import_fields, max_followed_severity, scheme) -def apply_allowlists(merged: dict, conf: argparse.Namespace, allowlists: dict): +def apply_allowlists(merged: Blocklist, conf: argparse.Namespace, allowlists: dict): """Apply allowlists """ # Apply allows specified on the commandline for domain in conf.allow_domains: log.info(f"'{domain}' allowed by commandline, removing any blocks...") - if domain in merged: - del merged[domain] + if domain in merged.blocks: + del merged.blocks[domain] # Apply allows from URLs lists log.info("Removing domains from URL allowlists...") - for key, alist in allowlists.items(): - log.debug(f"Processing allows from '{key}'...") - for allowed in alist: + for alist in allowlists: + log.debug(f"Processing allows from '{alist.origin}'...") + for allowed in alist.blocks.values(): domain = allowed.domain log.debug(f"Removing allowlisted domain '{domain}' from merged list.") - if domain in merged: - del merged[domain] + if domain in merged.blocks: + del merged.blocks[domain] return merged -def fetch_allowlists(conf: argparse.Namespace) -> dict: +def fetch_allowlists(conf: argparse.Namespace) -> Blocklist: """ """ if conf.allowlist_url_sources: - allowlists = fetch_from_urls({}, conf.allowlist_url_sources, ALLOWLIST_IMPORT_FIELDS) + allowlists = fetch_from_urls(conf.allowlist_url_sources, ALLOWLIST_IMPORT_FIELDS, conf.save_intermediate, conf.savedir) return allowlists - return {} + return Blocklist() -def fetch_from_urls(blocklists: dict, url_sources: dict, +def fetch_from_urls(url_sources: dict, import_fields: list=IMPORT_FIELDS, save_intermediate: bool=False, savedir: str=None, export_fields: list=EXPORT_FIELDS) -> dict: @@ -131,7 +131,7 @@ def fetch_from_urls(blocklists: dict, url_sources: dict, @returns: A dict of blocklists, same as input, but (possibly) modified """ log.info("Fetching domain blocks from URLs...") - + blocklists = [] for item in url_sources: url = item['url'] # If import fields are provided, they override the global ones passed in @@ -144,14 +144,14 @@ def fetch_from_urls(blocklists: dict, url_sources: dict, listformat = item.get('format', 'csv') with urlr.urlopen(url) as fp: rawdata = fp.read(URL_BLOCKLIST_MAXSIZE).decode('utf-8') - blocklists[url] = parse_blocklist(rawdata, listformat, import_fields, max_severity) - - if save_intermediate: - save_intermediate_blocklist(blocklists[url], url, savedir, export_fields) + bl = parse_blocklist(rawdata, url, listformat, import_fields, max_severity) + blocklists.append(bl) + if save_intermediate: + save_intermediate_blocklist(bl, savedir, export_fields) return blocklists -def fetch_from_instances(blocklists: dict, sources: dict, +def fetch_from_instances(sources: dict, import_fields: list=IMPORT_FIELDS, save_intermediate: bool=False, savedir: str=None, export_fields: list=EXPORT_FIELDS) -> dict: @@ -161,12 +161,13 @@ def fetch_from_instances(blocklists: dict, sources: dict, @returns: A dict of blocklists, same as input, but (possibly) modified """ log.info("Fetching domain blocks from instances...") + blocklists = [] for item in sources: domain = item['domain'] admin = item.get('admin', False) token = item.get('token', None) scheme = item.get('scheme', 'https') - itemsrc = f"{scheme}://{domain}/api" + # itemsrc = f"{scheme}://{domain}/api" # If import fields are provided, they override the global ones passed in source_import_fields = item.get('import_fields', None) @@ -174,45 +175,69 @@ def fetch_from_instances(blocklists: dict, sources: dict, # Ensure we always use the default fields import_fields = IMPORT_FIELDS.extend(source_import_fields) - # Add the blocklist with the domain as the source key - blocklists[itemsrc] = fetch_instance_blocklist(domain, token, admin, import_fields, scheme) + bl = fetch_instance_blocklist(domain, token, admin, import_fields, scheme) + blocklists.append(bl) if save_intermediate: - save_intermediate_blocklist(blocklists[itemsrc], domain, savedir, export_fields) + save_intermediate_blocklist(bl, savedir, export_fields) return blocklists -def merge_blocklists(blocklists: dict, mergeplan: str='max') -> dict: +def merge_blocklists(blocklists: list[Blocklist], mergeplan: str='max', + threshold: int=0, + threshold_type: str='count') -> Blocklist: """Merge fetched remote blocklists into a bulk update @param blocklists: A dict of lists of DomainBlocks, keyed by source. Each value is a list of DomainBlocks @param mergeplan: An optional method of merging overlapping block definitions 'max' (the default) uses the highest severity block found 'min' uses the lowest severity block found + @param threshold: An integer used in the threshold mechanism. + If a domain is not present in this number/pct or more of the blocklists, + it will not get merged into the final list. + @param threshold_type: choice of ['count', 'pct'] + If `count`, threshold is met if block is present in `threshold` + or more blocklists. + If `pct`, theshold is met if block is present in + count_of_mentions / number_of_blocklists. @param returns: A dict of DomainBlocks keyed by domain """ - merged = {} + merged = Blocklist('fediblockhole.merge_blocklists') - for key, blist in blocklists.items(): - log.debug(f"processing blocklist from: {key} ...") - for newblock in blist: - domain = newblock.domain - # If the domain has two asterisks in it, it's obfuscated - # and we can't really use it, so skip it and do the next one - if '*' in domain: - log.debug(f"Domain '{domain}' is obfuscated. Skipping it.") + num_blocklists = len(blocklists) + + # Create a domain keyed list of blocks for each domain + domain_blocks = {} + + for bl in blocklists: + for block in bl.values(): + if '*' in block.domain: + log.debug(f"Domain '{block.domain}' is obfuscated. Skipping it.") continue - - elif domain in merged: - log.debug(f"Overlapping block for domain {domain}. Merging...") - blockdata = apply_mergeplan(merged[domain], newblock, mergeplan) - + elif block.domain in domain_blocks: + domain_blocks[block.domain].append(block) else: - # New block - blockdata = newblock + domain_blocks[block.domain] = [block,] + + # Only merge items if `threshold` is met or exceeded + for domain in domain_blocks: + if threshold_type == 'count': + domain_threshold_level = len(domain_blocks[domain]) + elif threshold_type == 'pct': + domain_threshold_level = len(domain_blocks[domain]) / num_blocklists * 100 + # log.debug(f"domain threshold level: {domain_threshold_level}") + else: + raise ValueError(f"Unsupported threshold type '{threshold_type}'. Supported values are: 'count', 'pct'") + + log.debug(f"Checking if {domain_threshold_level} >= {threshold} for {domain}") + if domain_threshold_level >= threshold: + # Add first block in the list to merged + block = domain_blocks[domain][0] + log.debug(f"Yes. Merging block: {block}") + + # Merge the others with this record + for newblock in domain_blocks[domain][1:]: + block = apply_mergeplan(block, newblock, mergeplan) + merged.blocks[block.domain] = block - # end if - log.debug(f"blockdata is: {blockdata}") - merged[domain] = blockdata - # end for return merged def apply_mergeplan(oldblock: DomainBlock, newblock: DomainBlock, mergeplan: str='max') -> dict: @@ -239,10 +264,10 @@ def apply_mergeplan(oldblock: DomainBlock, newblock: DomainBlock, mergeplan: str # How do we override an earlier block definition? if mergeplan in ['max', None]: # Use the highest block level found (the default) - log.debug(f"Using 'max' mergeplan.") + # log.debug(f"Using 'max' mergeplan.") if newblock.severity > oldblock.severity: - log.debug(f"New block severity is higher. Using that.") + # log.debug(f"New block severity is higher. Using that.") blockdata['severity'] = newblock.severity # For 'reject_media', 'reject_reports', and 'obfuscate' if @@ -271,7 +296,7 @@ def apply_mergeplan(oldblock: DomainBlock, newblock: DomainBlock, mergeplan: str else: raise NotImplementedError(f"Mergeplan '{mergeplan}' not implemented.") - log.debug(f"Block severity set to {blockdata['severity']}") + # log.debug(f"Block severity set to {blockdata['severity']}") return DomainBlock(**blockdata) @@ -357,17 +382,19 @@ def fetch_instance_blocklist(host: str, token: str=None, admin: bool=False, url = f"{scheme}://{host}{api_path}" - blocklist = [] + blockdata = [] link = True - while link: response = requests.get(url, headers=headers, timeout=REQUEST_TIMEOUT) if response.status_code != 200: log.error(f"Cannot fetch remote blocklist: {response.content}") raise ValueError("Unable to fetch domain block list: %s", response) - blocklist.extend( parse_blocklist(response.content, parse_format, import_fields) ) - + # Each block of returned data is a JSON list of dicts + # so we parse them and append them to the fetched list + # of JSON data we need to parse. + + blockdata.extend(json.loads(response.content.decode('utf-8'))) # Parse the link header to find the next url to fetch # This is a weird and janky way of doing pagination but # hey nothing we can do about it we just have to deal @@ -385,6 +412,8 @@ def fetch_instance_blocklist(host: str, token: str=None, admin: bool=False, urlstring, rel = next.split('; ') url = urlstring.strip('<').rstrip('>') + blocklist = parse_blocklist(blockdata, url, parse_format, import_fields) + return blocklist def delete_block(token: str, host: str, id: int, scheme: str='https'): @@ -474,13 +503,9 @@ def update_known_block(token: str, host: str, block: DomainBlock, scheme: str='h """Update an existing domain block with information in blockdict""" api_path = "/api/v1/admin/domain_blocks/" - try: - id = block.id - blockdata = block._asdict() - del blockdata['id'] - except KeyError: - import pdb - pdb.set_trace() + id = block.id + blockdata = block._asdict() + del blockdata['id'] url = f"{scheme}://{host}{api_path}{id}" @@ -514,7 +539,7 @@ def add_block(token: str, host: str, blockdata: DomainBlock, scheme: str='https' raise ValueError(f"Something went wrong: {response.status_code}: {response.content}") -def push_blocklist(token: str, host: str, blocklist: list[dict], +def push_blocklist(token: str, host: str, blocklist: list[DomainBlock], dryrun: bool=False, import_fields: list=['domain', 'severity'], max_followed_severity:BlockSeverity=BlockSeverity('silence'), @@ -522,8 +547,7 @@ def push_blocklist(token: str, host: str, blocklist: list[dict], ): """Push a blocklist to a remote instance. - Merging the blocklist with the existing list the instance has, - updating existing entries if they exist. + Updates existing entries if they exist, creates new blocks if they don't. @param token: The Bearer token for OAUTH API authentication @param host: The instance host, FQDN or IP @@ -538,15 +562,16 @@ def push_blocklist(token: str, host: str, blocklist: list[dict], serverblocks = fetch_instance_blocklist(host, token, True, import_fields, scheme) # # Convert serverblocks to a dictionary keyed by domain name - knownblocks = {row.domain: row for row in serverblocks} + # knownblocks = {row.domain: row for row in serverblocks} - for newblock in blocklist: + for newblock in blocklist.values(): log.debug(f"Processing block: {newblock}") - oldblock = knownblocks.get(newblock.domain, None) - if oldblock: + if newblock.domain in serverblocks: log.debug(f"Block already exists for {newblock.domain}, checking for differences...") + oldblock = serverblocks[newblock.domain] + change_needed = is_change_needed(oldblock, newblock, import_fields) # Is the severity changing? @@ -605,15 +630,14 @@ def load_config(configfile: str): conf = toml.load(configfile) return conf -def save_intermediate_blocklist( - blocklist: list[dict], source: str, - filedir: str, +def save_intermediate_blocklist(blocklist: Blocklist, filedir: str, export_fields: list=['domain','severity']): """Save a local copy of a blocklist we've downloaded """ # Invent a filename based on the remote source # If the source was a URL, convert it to something less messy # If the source was a remote domain, just use the name of the domain + source = blocklist.origin log.debug(f"Saving intermediate blocklist from {source}") source = source.replace('/','-') filename = f"{source}.csv" @@ -621,7 +645,7 @@ def save_intermediate_blocklist( save_blocklist_to_file(blocklist, filepath, export_fields) def save_blocklist_to_file( - blocklist: list[DomainBlock], + blocklist: Blocklist, filepath: str, export_fields: list=['domain','severity']): """Save a blocklist we've downloaded from a remote source @@ -631,18 +655,22 @@ def save_blocklist_to_file( @param export_fields: Which fields to include in the export. """ try: - blocklist = sorted(blocklist, key=lambda x: x.domain) + sorted_list = sorted(blocklist.blocks.items()) except KeyError: log.error("Field 'domain' not found in blocklist.") - log.debug(f"blocklist is: {blocklist}") + log.debug(f"blocklist is: {sorted_list}") + except AttributeError: + log.error("Attribute error!") + import pdb + pdb.set_trace() log.debug(f"export fields: {export_fields}") with open(filepath, "w") as fp: writer = csv.DictWriter(fp, export_fields, extrasaction='ignore') writer.writeheader() - for item in blocklist: - writer.writerow(item._asdict()) + for key, value in sorted_list: + writer.writerow(value) def augment_args(args, tomldata: str=None): """Augment commandline arguments with config file parameters @@ -682,6 +710,12 @@ def augment_args(args, tomldata: str=None): if not args.mergeplan: args.mergeplan = conf.get('mergeplan', 'max') + if not args.merge_threshold: + args.merge_threshold = conf.get('merge_threshold', 0) + + if not args.merge_threshold_type: + args.merge_threshold_type = conf.get('merge_threshold_type', 'count') + args.blocklist_url_sources = conf.get('blocklist_url_sources', []) args.blocklist_instance_sources = conf.get('blocklist_instance_sources', []) args.allowlist_url_sources = conf.get('allowlist_url_sources', []) @@ -703,6 +737,8 @@ def setup_argparse(): ap.add_argument('-S', '--save-intermediate', dest="save_intermediate", action='store_true', help="Save intermediate blocklists we fetch to local files.") ap.add_argument('-D', '--savedir', dest="savedir", help="Directory path to save intermediate lists.") ap.add_argument('-m', '--mergeplan', choices=['min', 'max'], help="Set mergeplan.") + ap.add_argument('--merge-threshold', type=int, help="Merge threshold value") + ap.add_argument('--merge-threshold-type', choices=['count', 'pct'], help="Type of merge threshold to use.") ap.add_argument('-I', '--import-field', dest='import_fields', action='append', help="Extra blocklist fields to import.") ap.add_argument('-E', '--export-field', dest='export_fields', action='append', help="Extra blocklist fields to export.") diff --git a/src/fediblockhole/blocklist_parser.py b/src/fediblockhole/blocklists.py similarity index 80% rename from src/fediblockhole/blocklist_parser.py rename to src/fediblockhole/blocklists.py index 135afa6..1edf886 100644 --- a/src/fediblockhole/blocklist_parser.py +++ b/src/fediblockhole/blocklists.py @@ -1,19 +1,47 @@ """Parse various blocklist data formats """ -from typing import Iterable -from .const import DomainBlock, BlockSeverity - import csv import json +from typing import Iterable +from dataclasses import dataclass, field + +from .const import DomainBlock, BlockSeverity import logging log = logging.getLogger('fediblockhole') +@dataclass +class Blocklist: + """ A Blocklist object + + A Blocklist is a list of DomainBlocks from an origin + """ + origin: str = None + blocks: dict[str, DomainBlock] = field(default_factory=dict) + + def __len__(self): + return len(self.blocks) + + def __class_getitem__(cls, item): + return dict[str, DomainBlock] + + def __getitem__(self, item): + return self.blocks[item] + + def __iter__(self): + return self.blocks.__iter__() + + def items(self): + return self.blocks.items() + + def values(self): + return self.blocks.values() + class BlocklistParser(object): """ Base class for parsing blocklists """ - preparse = False + do_preparse = False def __init__(self, import_fields: list=['domain', 'severity'], max_severity: str='suspend'): @@ -30,17 +58,18 @@ class BlocklistParser(object): """ raise NotImplementedError - def parse_blocklist(self, blockdata) -> dict[DomainBlock]: + def parse_blocklist(self, blockdata, origin:str=None) -> Blocklist: """Parse an iterable of blocklist items @param blocklist: An Iterable of blocklist items @returns: A dict of DomainBlocks, keyed by domain """ - if self.preparse: + if self.do_preparse: blockdata = self.preparse(blockdata) - parsed_list = [] + parsed_list = Blocklist(origin) for blockitem in blockdata: - parsed_list.append(self.parse_item(blockitem)) + block = self.parse_item(blockitem) + parsed_list.blocks[block.domain] = block return parsed_list def parse_item(self, blockitem) -> DomainBlock: @@ -53,12 +82,13 @@ class BlocklistParser(object): class BlocklistParserJSON(BlocklistParser): """Parse a JSON formatted blocklist""" - preparse = True + do_preparse = True def preparse(self, blockdata) -> Iterable: - """Parse the blockdata as JSON - """ - return json.loads(blockdata) + """Parse the blockdata as JSON if needed""" + if type(blockdata) == type(''): + return json.loads(blockdata) + return blockdata def parse_item(self, blockitem: dict) -> DomainBlock: # Remove fields we don't want to import @@ -102,7 +132,7 @@ class BlocklistParserCSV(BlocklistParser): The parser expects the CSV data to include a header with the field names. """ - preparse = True + do_preparse = True def preparse(self, blockdata) -> Iterable: """Use a csv.DictReader to create an iterable from the blockdata @@ -130,6 +160,24 @@ class BlocklistParserCSV(BlocklistParser): block.severity = self.max_severity return block +class BlocklistParserMastodonCSV(BlocklistParserCSV): + """ Parse Mastodon CSV formatted blocklists + + The Mastodon v4.1.x domain block CSV export prefixes its + field names with a '#' character because… reasons? + """ + do_preparse = True + + def parse_item(self, blockitem: dict) -> DomainBlock: + """Build a new blockitem dict with new un-#ed keys + """ + newdict = {} + for key in blockitem: + newkey = key.lstrip('#') + newdict[newkey] = blockitem[key] + + return super().parse_item(newdict) + class RapidBlockParserCSV(BlocklistParserCSV): """ Parse RapidBlock CSV blocklists @@ -193,6 +241,7 @@ def str2bool(boolstring: str) -> bool: FORMAT_PARSERS = { 'csv': BlocklistParserCSV, + 'mastodon_csv': BlocklistParserMastodonCSV, 'json': BlocklistParserJSON, 'mastodon_api_public': BlocklistParserMastodonAPIPublic, 'rapidblock.csv': RapidBlockParserCSV, @@ -202,11 +251,13 @@ FORMAT_PARSERS = { # helper function to select the appropriate Parser def parse_blocklist( blockdata, + origin, format="csv", import_fields: list=['domain', 'severity'], max_severity: str='suspend'): """Parse a blocklist in the given format """ - parser = FORMAT_PARSERS[format](import_fields, max_severity) log.debug(f"parsing {format} blocklist with import_fields: {import_fields}...") - return parser.parse_blocklist(blockdata) \ No newline at end of file + + parser = FORMAT_PARSERS[format](import_fields, max_severity) + return parser.parse_blocklist(blockdata, origin) \ No newline at end of file diff --git a/tests/test_allowlist.py b/tests/test_allowlist.py index 902b301..ddd53b9 100644 --- a/tests/test_allowlist.py +++ b/tests/test_allowlist.py @@ -4,6 +4,7 @@ import pytest from util import shim_argparse from fediblockhole.const import DomainBlock +from fediblockhole.blocklists import Blocklist from fediblockhole import fetch_allowlists, apply_allowlists def test_cmdline_allow_removes_domain(): @@ -11,17 +12,13 @@ def test_cmdline_allow_removes_domain(): """ conf = shim_argparse(['-A', 'removeme.org']) - merged = { + merged = Blocklist('test_allowlist.merged', { 'example.org': DomainBlock('example.org'), 'example2.org': DomainBlock('example2.org'), 'removeme.org': DomainBlock('removeme.org'), 'keepblockingme.org': DomainBlock('keepblockingme.org'), - } + }) - # allowlists = { - # 'testlist': [ DomainBlock('removeme.org', 'noop'), ] - # } - merged = apply_allowlists(merged, conf, {}) with pytest.raises(KeyError): @@ -32,16 +29,18 @@ def test_allowlist_removes_domain(): """ conf = shim_argparse() - merged = { + merged = Blocklist('test_allowlist.merged', { 'example.org': DomainBlock('example.org'), 'example2.org': DomainBlock('example2.org'), 'removeme.org': DomainBlock('removeme.org'), 'keepblockingme.org': DomainBlock('keepblockingme.org'), - } + }) - allowlists = { - 'testlist': [ DomainBlock('removeme.org', 'noop'), ] - } + allowlists = [ + Blocklist('test_allowlist', { + 'removeme.org': DomainBlock('removeme.org', 'noop'), + }) + ] merged = apply_allowlists(merged, conf, allowlists) @@ -53,19 +52,19 @@ def test_allowlist_removes_tld(): """ conf = shim_argparse() - merged = { + merged = Blocklist('test_allowlist.merged', { '.cf': DomainBlock('.cf'), 'example.org': DomainBlock('example.org'), '.tk': DomainBlock('.tk'), 'keepblockingme.org': DomainBlock('keepblockingme.org'), - } + }) - allowlists = { - 'list1': [ - DomainBlock('.cf', 'noop'), - DomainBlock('.tk', 'noop'), - ] - } + allowlists = [ + Blocklist('test_allowlist.list1', { + '.cf': DomainBlock('.cf', 'noop'), + '.tk': DomainBlock('.tk', 'noop'), + }) + ] merged = apply_allowlists(merged, conf, allowlists) diff --git a/tests/test_configfile.py b/tests/test_configfile.py index 4b2c1e7..9e31c9d 100644 --- a/tests/test_configfile.py +++ b/tests/test_configfile.py @@ -49,3 +49,33 @@ allowlist_url_sources = [ { url='file:///path/to/allowlist', format='csv'} ] 'url': 'file:///path/to/allowlist', 'format': 'csv', }] + +def test_set_merge_thresold_default(): + tomldata = """ +""" + args = shim_argparse([], tomldata) + + assert args.mergeplan == 'max' + assert args.merge_threshold_type == 'count' + +def test_set_merge_thresold_count(): + tomldata = """# Add a merge threshold +merge_threshold_type = 'count' +merge_threshold = 2 +""" + args = shim_argparse([], tomldata) + + assert args.mergeplan == 'max' + assert args.merge_threshold_type == 'count' + assert args.merge_threshold == 2 + +def test_set_merge_thresold_pct(): + tomldata = """# Add a merge threshold +merge_threshold_type = 'pct' +merge_threshold = 35 +""" + args = shim_argparse([], tomldata) + + assert args.mergeplan == 'max' + assert args.merge_threshold_type == 'pct' + assert args.merge_threshold == 35 diff --git a/tests/test_merge_thresholds.py b/tests/test_merge_thresholds.py new file mode 100644 index 0000000..4cde03e --- /dev/null +++ b/tests/test_merge_thresholds.py @@ -0,0 +1,153 @@ +"""Test merge with thresholds +""" + +from fediblockhole.blocklists import Blocklist, parse_blocklist +from fediblockhole import merge_blocklists, apply_mergeplan + +from fediblockhole.const import SeverityLevel, DomainBlock + +datafile01 = "data-suspends-01.csv" +datafile02 = "data-silences-01.csv" +datafile03 = "data-noop-01.csv" + +import_fields = [ + 'domain', + 'severity', + 'public_comment', + 'private_comment', + 'reject_media', + 'reject_reports', + 'obfuscate' +] + +def load_test_blocklist_data(datafiles): + + blocklists = [] + + for df in datafiles: + with open(df) as fp: + data = fp.read() + bl = parse_blocklist(data, df, 'csv', import_fields) + blocklists.append(bl) + + return blocklists + +def test_mergeplan_count_2(): + """Only merge a block if present in 2 or more lists + """ + + bl_1 = Blocklist('test01', { + 'onemention.example.org': DomainBlock('onemention.example.org', 'suspend', '', '', True, True, True), + 'twomention.example.org': DomainBlock('twomention.example.org', 'suspend', '', '', True, True, True), + 'threemention.example.org': DomainBlock('threemention.example.org', 'suspend', '', '', True, True, True), + }) + + bl_2 = Blocklist('test2', { + 'twomention.example.org': DomainBlock('twomention.example.org', 'suspend', '', '', True, True, True), + 'threemention.example.org': DomainBlock('threemention.example.org', 'suspend', '', '', True, True, True), + }) + + bl_3 = Blocklist('test3', { + 'threemention.example.org': DomainBlock('threemention.example.org', 'suspend', '', '', True, True, True), + 'threemention.example.org': DomainBlock('threemention.example.org', 'suspend', '', '', True, True, True), + }) + + ml = merge_blocklists([bl_1, bl_2, bl_3], 'max', threshold=2) + + assert 'onemention.example.org' not in ml + assert 'twomention.example.org' in ml + assert 'threemention.example.org' in ml + +def test_mergeplan_count_3(): + """Only merge a block if present in 3 or more lists + """ + + bl_1 = Blocklist('test01', { + 'onemention.example.org': DomainBlock('onemention.example.org', 'suspend', '', '', True, True, True), + 'twomention.example.org': DomainBlock('twomention.example.org', 'suspend', '', '', True, True, True), + 'threemention.example.org': DomainBlock('threemention.example.org', 'suspend', '', '', True, True, True), + }) + + bl_2 = Blocklist('test2', { + 'twomention.example.org': DomainBlock('twomention.example.org', 'suspend', '', '', True, True, True), + 'threemention.example.org': DomainBlock('threemention.example.org', 'suspend', '', '', True, True, True), + }) + + bl_3 = Blocklist('test3', { + 'threemention.example.org': DomainBlock('threemention.example.org', 'suspend', '', '', True, True, True), + 'threemention.example.org': DomainBlock('threemention.example.org', 'suspend', '', '', True, True, True), + }) + + ml = merge_blocklists([bl_1, bl_2, bl_3], 'max', threshold=3) + + assert 'onemention.example.org' not in ml + assert 'twomention.example.org' not in ml + assert 'threemention.example.org' in ml + +def test_mergeplan_pct_30(): + """Only merge a block if present in 2 or more lists + """ + + bl_1 = Blocklist('test01', { + 'onemention.example.org': DomainBlock('onemention.example.org', 'suspend', '', '', True, True, True), + 'twomention.example.org': DomainBlock('twomention.example.org', 'suspend', '', '', True, True, True), + 'fourmention.example.org': DomainBlock('fourmention.example.org', 'suspend', '', '', True, True, True), + + }) + + bl_2 = Blocklist('test2', { + 'twomention.example.org': DomainBlock('twomention.example.org', 'suspend', '', '', True, True, True), + 'threemention.example.org': DomainBlock('threemention.example.org', 'suspend', '', '', True, True, True), + 'fourmention.example.org': DomainBlock('fourmention.example.org', 'suspend', '', '', True, True, True), + }) + + bl_3 = Blocklist('test3', { + 'threemention.example.org': DomainBlock('threemention.example.org', 'suspend', '', '', True, True, True), + 'fourmention.example.org': DomainBlock('fourmention.example.org', 'suspend', '', '', True, True, True), + }) + + bl_4 = Blocklist('test4', { + 'threemention.example.org': DomainBlock('threemention.example.org', 'suspend', '', '', True, True, True), + 'fourmention.example.org': DomainBlock('fourmention.example.org', 'suspend', '', '', True, True, True), + }) + + ml = merge_blocklists([bl_1, bl_2, bl_3, bl_4], 'max', threshold=30, threshold_type='pct') + + assert 'onemention.example.org' not in ml + assert 'twomention.example.org' in ml + assert 'threemention.example.org' in ml + assert 'fourmention.example.org' in ml + +def test_mergeplan_pct_55(): + """Only merge a block if present in 2 or more lists + """ + + bl_1 = Blocklist('test01', { + 'onemention.example.org': DomainBlock('onemention.example.org', 'suspend', '', '', True, True, True), + 'twomention.example.org': DomainBlock('twomention.example.org', 'suspend', '', '', True, True, True), + 'fourmention.example.org': DomainBlock('fourmention.example.org', 'suspend', '', '', True, True, True), + + }) + + bl_2 = Blocklist('test2', { + 'twomention.example.org': DomainBlock('twomention.example.org', 'suspend', '', '', True, True, True), + 'threemention.example.org': DomainBlock('threemention.example.org', 'suspend', '', '', True, True, True), + 'fourmention.example.org': DomainBlock('fourmention.example.org', 'suspend', '', '', True, True, True), + }) + + bl_3 = Blocklist('test3', { + 'threemention.example.org': DomainBlock('threemention.example.org', 'suspend', '', '', True, True, True), + 'fourmention.example.org': DomainBlock('fourmention.example.org', 'suspend', '', '', True, True, True), + }) + + bl_4 = Blocklist('test4', { + 'threemention.example.org': DomainBlock('threemention.example.org', 'suspend', '', '', True, True, True), + 'fourmention.example.org': DomainBlock('fourmention.example.org', 'suspend', '', '', True, True, True), + }) + + ml = merge_blocklists([bl_1, bl_2, bl_3, bl_4], 'max', threshold=55, threshold_type='pct') + + assert 'onemention.example.org' not in ml + assert 'twomention.example.org' not in ml + assert 'threemention.example.org' in ml + assert 'fourmention.example.org' in ml \ No newline at end of file diff --git a/tests/test_mergeplan.py b/tests/test_mergeplan.py index 55f3914..42d2816 100644 --- a/tests/test_mergeplan.py +++ b/tests/test_mergeplan.py @@ -1,7 +1,7 @@ """Various mergeplan tests """ -from fediblockhole.blocklist_parser import parse_blocklist +from fediblockhole.blocklists import parse_blocklist from fediblockhole import merge_blocklists, merge_comments, apply_mergeplan from fediblockhole.const import SeverityLevel, DomainBlock @@ -22,20 +22,19 @@ import_fields = [ def load_test_blocklist_data(datafiles): - blocklists = {} + blocklists = [] for df in datafiles: with open(df) as fp: data = fp.read() - bl = parse_blocklist(data, 'csv', import_fields) - blocklists[df] = bl + bl = parse_blocklist(data, df, 'csv', import_fields) + blocklists.append(bl) return blocklists def test_mergeplan_max(): """Test 'max' mergeplan""" blocklists = load_test_blocklist_data([datafile01, datafile02]) - bl = merge_blocklists(blocklists, 'max') assert len(bl) == 13 diff --git a/tests/test_parser_csv.py b/tests/test_parser_csv.py index c817e16..703fe95 100644 --- a/tests/test_parser_csv.py +++ b/tests/test_parser_csv.py @@ -1,22 +1,24 @@ """Tests of the CSV parsing """ -from fediblockhole.blocklist_parser import BlocklistParserCSV, parse_blocklist -from fediblockhole.const import DomainBlock, BlockSeverity, SeverityLevel +from fediblockhole.blocklists import BlocklistParserCSV, parse_blocklist +from fediblockhole.const import SeverityLevel def test_single_line(): csvdata = "example.org" + origin = "csvfile" parser = BlocklistParserCSV() - bl = parser.parse_blocklist(csvdata) + bl = parser.parse_blocklist(csvdata, origin) assert len(bl) == 0 def test_header_only(): csvdata = "domain,severity,public_comment" + origin = "csvfile" parser = BlocklistParserCSV() - bl = parser.parse_blocklist(csvdata) + bl = parser.parse_blocklist(csvdata, origin) assert len(bl) == 0 def test_2_blocks(): @@ -24,12 +26,13 @@ def test_2_blocks(): example.org,silence example2.org,suspend """ + origin = "csvfile" parser = BlocklistParserCSV() - bl = parser.parse_blocklist(csvdata) + bl = parser.parse_blocklist(csvdata, origin) assert len(bl) == 2 - assert bl[0].domain == 'example.org' + assert 'example.org' in bl def test_4_blocks(): csvdata = """domain,severity,public_comment @@ -38,20 +41,21 @@ example2.org,suspend,"test 2" example3.org,noop,"test 3" example4.org,suspend,"test 4" """ + origin = "csvfile" parser = BlocklistParserCSV() - bl = parser.parse_blocklist(csvdata) + bl = parser.parse_blocklist(csvdata, origin) assert len(bl) == 4 - assert bl[0].domain == 'example.org' - assert bl[1].domain == 'example2.org' - assert bl[2].domain == 'example3.org' - assert bl[3].domain == 'example4.org' + assert 'example.org' in bl + assert 'example2.org' in bl + assert 'example3.org' in bl + assert 'example4.org' in bl - assert bl[0].severity.level == SeverityLevel.SILENCE - assert bl[1].severity.level == SeverityLevel.SUSPEND - assert bl[2].severity.level == SeverityLevel.NONE - assert bl[3].severity.level == SeverityLevel.SUSPEND + assert bl['example.org'].severity.level == SeverityLevel.SILENCE + assert bl['example2.org'].severity.level == SeverityLevel.SUSPEND + assert bl['example3.org'].severity.level == SeverityLevel.NONE + assert bl['example4.org'].severity.level == SeverityLevel.SUSPEND def test_ignore_comments(): csvdata = """domain,severity,public_comment,private_comment @@ -60,18 +64,18 @@ example2.org,suspend,"test 2","ignote me also" example3.org,noop,"test 3","and me" example4.org,suspend,"test 4","also me" """ + origin = "csvfile" parser = BlocklistParserCSV() - bl = parser.parse_blocklist(csvdata) + bl = parser.parse_blocklist(csvdata, origin) assert len(bl) == 4 - assert bl[0].domain == 'example.org' - assert bl[1].domain == 'example2.org' - assert bl[2].domain == 'example3.org' - assert bl[3].domain == 'example4.org' + assert 'example.org' in bl + assert 'example2.org' in bl + assert 'example3.org' in bl + assert 'example4.org' in bl - assert bl[0].public_comment == '' - assert bl[0].private_comment == '' - - assert bl[2].public_comment == '' - assert bl[2].private_comment == '' \ No newline at end of file + assert bl['example.org'].public_comment == '' + assert bl['example.org'].private_comment == '' + assert bl['example3.org'].public_comment == '' + assert bl['example4.org'].private_comment == '' \ No newline at end of file diff --git a/tests/test_parser_csv_mastodon.py b/tests/test_parser_csv_mastodon.py new file mode 100644 index 0000000..6e85c71 --- /dev/null +++ b/tests/test_parser_csv_mastodon.py @@ -0,0 +1,81 @@ +"""Tests of the CSV parsing +""" + +from fediblockhole.blocklists import BlocklistParserMastodonCSV +from fediblockhole.const import SeverityLevel + + +def test_single_line(): + csvdata = "example.org" + origin = "csvfile" + + parser = BlocklistParserMastodonCSV() + bl = parser.parse_blocklist(csvdata, origin) + assert len(bl) == 0 + +def test_header_only(): + csvdata = "#domain,#severity,#public_comment" + origin = "csvfile" + + parser = BlocklistParserMastodonCSV() + bl = parser.parse_blocklist(csvdata, origin) + assert len(bl) == 0 + +def test_2_blocks(): + csvdata = """domain,severity +example.org,silence +example2.org,suspend +""" + origin = "csvfile" + + parser = BlocklistParserMastodonCSV() + bl = parser.parse_blocklist(csvdata, origin) + + assert len(bl) == 2 + assert 'example.org' in bl + +def test_4_blocks(): + csvdata = """domain,severity,public_comment +example.org,silence,"test 1" +example2.org,suspend,"test 2" +example3.org,noop,"test 3" +example4.org,suspend,"test 4" +""" + origin = "csvfile" + + parser = BlocklistParserMastodonCSV() + bl = parser.parse_blocklist(csvdata, origin) + + assert len(bl) == 4 + assert 'example.org' in bl + assert 'example2.org' in bl + assert 'example3.org' in bl + assert 'example4.org' in bl + + assert bl['example.org'].severity.level == SeverityLevel.SILENCE + assert bl['example2.org'].severity.level == SeverityLevel.SUSPEND + assert bl['example3.org'].severity.level == SeverityLevel.NONE + assert bl['example4.org'].severity.level == SeverityLevel.SUSPEND + +def test_ignore_comments(): + csvdata = """domain,severity,public_comment,private_comment +example.org,silence,"test 1","ignore me" +example2.org,suspend,"test 2","ignote me also" +example3.org,noop,"test 3","and me" +example4.org,suspend,"test 4","also me" +""" + origin = "csvfile" + + parser = BlocklistParserMastodonCSV() + bl = parser.parse_blocklist(csvdata, origin) + + assert len(bl) == 4 + assert 'example.org' in bl + assert 'example2.org' in bl + assert 'example3.org' in bl + assert 'example4.org' in bl + + assert bl['example.org'].public_comment == '' + assert bl['example.org'].private_comment == '' + assert bl['example3.org'].public_comment == '' + assert bl['example4.org'].private_comment == '' \ No newline at end of file diff --git a/tests/test_parser_json.py b/tests/test_parser_json.py index 8bf17df..b2fb0a1 100644 --- a/tests/test_parser_json.py +++ b/tests/test_parser_json.py @@ -1,8 +1,8 @@ """Tests of the CSV parsing """ -from fediblockhole.blocklist_parser import BlocklistParserJSON, parse_blocklist -from fediblockhole.const import DomainBlock, BlockSeverity, SeverityLevel +from fediblockhole.blocklists import BlocklistParserJSON, parse_blocklist +from fediblockhole.const import SeverityLevel datafile = 'data-mastodon.json' @@ -14,33 +14,32 @@ def test_json_parser(): data = load_data() parser = BlocklistParserJSON() - bl = parser.parse_blocklist(data) + bl = parser.parse_blocklist(data, 'test_json') assert len(bl) == 10 - assert bl[0].domain == 'example.org' - assert bl[1].domain == 'example2.org' - assert bl[2].domain == 'example3.org' - assert bl[3].domain == 'example4.org' + assert 'example.org' in bl + assert 'example2.org' in bl + assert 'example3.org' in bl + assert 'example4.org' in bl - assert bl[0].severity.level == SeverityLevel.SUSPEND - assert bl[1].severity.level == SeverityLevel.SILENCE - assert bl[2].severity.level == SeverityLevel.SUSPEND - assert bl[3].severity.level == SeverityLevel.NONE + assert bl['example.org'].severity.level == SeverityLevel.SUSPEND + assert bl['example2.org'].severity.level == SeverityLevel.SILENCE + assert bl['example3.org'].severity.level == SeverityLevel.SUSPEND + assert bl['example4.org'].severity.level == SeverityLevel.NONE def test_ignore_comments(): data = load_data() parser = BlocklistParserJSON() - bl = parser.parse_blocklist(data) + bl = parser.parse_blocklist(data, 'test_json') assert len(bl) == 10 - assert bl[0].domain == 'example.org' - assert bl[1].domain == 'example2.org' - assert bl[2].domain == 'example3.org' - assert bl[3].domain == 'example4.org' + assert 'example.org' in bl + assert 'example2.org' in bl + assert 'example3.org' in bl + assert 'example4.org' in bl - assert bl[0].public_comment == '' - assert bl[0].private_comment == '' - - assert bl[2].public_comment == '' - assert bl[2].private_comment == '' \ No newline at end of file + assert bl['example.org'].public_comment == '' + assert bl['example.org'].private_comment == '' + assert bl['example3.org'].public_comment == '' + assert bl['example4.org'].private_comment == '' \ No newline at end of file diff --git a/tests/test_parser_rapidblockcsv.py b/tests/test_parser_rapidblockcsv.py index edb8d1e..65d579d 100644 --- a/tests/test_parser_rapidblockcsv.py +++ b/tests/test_parser_rapidblockcsv.py @@ -1,7 +1,7 @@ """Tests of the Rapidblock CSV parsing """ -from fediblockhole.blocklist_parser import RapidBlockParserCSV, parse_blocklist +from fediblockhole.blocklists import RapidBlockParserCSV, parse_blocklist from fediblockhole.const import DomainBlock, BlockSeverity, SeverityLevel csvdata = """example.org\r\nsubdomain.example.org\r\nanotherdomain.org\r\ndomain4.org\r\n""" @@ -11,13 +11,13 @@ def test_basic_rapidblock(): bl = parser.parse_blocklist(csvdata) assert len(bl) == 4 - assert bl[0].domain == 'example.org' - assert bl[1].domain == 'subdomain.example.org' - assert bl[2].domain == 'anotherdomain.org' - assert bl[3].domain == 'domain4.org' + assert 'example.org' in bl + assert 'subdomain.example.org' in bl + assert 'anotherdomain.org' in bl + assert 'domain4.org' in bl def test_severity_is_suspend(): bl = parser.parse_blocklist(csvdata) - for block in bl: + for block in bl.values(): assert block.severity.level == SeverityLevel.SUSPEND \ No newline at end of file diff --git a/tests/test_parser_rapidblockjson.py b/tests/test_parser_rapidblockjson.py index 8ccca0f..ad13811 100644 --- a/tests/test_parser_rapidblockjson.py +++ b/tests/test_parser_rapidblockjson.py @@ -1,6 +1,6 @@ """Test parsing the RapidBlock JSON format """ -from fediblockhole.blocklist_parser import parse_blocklist +from fediblockhole.blocklists import parse_blocklist from fediblockhole.const import SeverityLevel @@ -9,26 +9,26 @@ rapidblockjson = "data-rapidblock.json" def test_parse_rapidblock_json(): with open(rapidblockjson) as fp: data = fp.read() - bl = parse_blocklist(data, 'rapidblock.json') + bl = parse_blocklist(data, 'pytest', 'rapidblock.json') - assert bl[0].domain == '101010.pl' - assert bl[0].severity.level == SeverityLevel.SUSPEND - assert bl[0].public_comment == '' + assert '101010.pl' in bl + assert bl['101010.pl'].severity.level == SeverityLevel.SUSPEND + assert bl['101010.pl'].public_comment == '' - assert bl[10].domain == 'berserker.town' - assert bl[10].severity.level == SeverityLevel.SUSPEND - assert bl[10].public_comment == '' - assert bl[10].private_comment == '' + assert 'berserker.town' in bl + assert bl['berserker.town'].severity.level == SeverityLevel.SUSPEND + assert bl['berserker.town'].public_comment == '' + assert bl['berserker.town'].private_comment == '' def test_parse_with_comments(): with open(rapidblockjson) as fp: data = fp.read() - bl = parse_blocklist(data, 'rapidblock.json', ['domain', 'severity', 'public_comment', 'private_comment']) + bl = parse_blocklist(data, 'pytest', 'rapidblock.json', ['domain', 'severity', 'public_comment', 'private_comment']) - assert bl[0].domain == '101010.pl' - assert bl[0].severity.level == SeverityLevel.SUSPEND - assert bl[0].public_comment == 'cryptomining javascript, white supremacy' + assert '101010.pl' in bl + assert bl['101010.pl'].severity.level == SeverityLevel.SUSPEND + assert bl['101010.pl'].public_comment == 'cryptomining javascript, white supremacy' - assert bl[10].domain == 'berserker.town' - assert bl[10].severity.level == SeverityLevel.SUSPEND - assert bl[10].public_comment == 'freeze peach' \ No newline at end of file + assert 'berserker.town' in bl + assert bl['berserker.town'].severity.level == SeverityLevel.SUSPEND + assert bl['berserker.town'].public_comment == 'freeze peach' \ No newline at end of file