From b67ff0c47174b66996e19ab4ba335d67fa49fac7 Mon Sep 17 00:00:00 2001 From: Justin Warren Date: Sun, 12 Feb 2023 17:53:26 +1100 Subject: [PATCH] Merging domain above threshold needs to be in the threshold check block. Added debugging statements for threshold merging. --- src/fediblockhole/__init__.py | 8 +- tests/test_merge_thresholds.py | 153 +++++++++++++++++++++++++++++++++ 2 files changed, 159 insertions(+), 2 deletions(-) create mode 100644 tests/test_merge_thresholds.py diff --git a/src/fediblockhole/__init__.py b/src/fediblockhole/__init__.py index f1bc354..465b08d 100755 --- a/src/fediblockhole/__init__.py +++ b/src/fediblockhole/__init__.py @@ -222,17 +222,21 @@ def merge_blocklists(blocklists: list[Blocklist], mergeplan: str='max', if threshold_type == 'count': domain_threshold_level = len(domain_blocks[domain]) elif threshold_type == 'pct': - domain_threshold_level = len(domain_blocks[domain]) / num_blocklists + domain_threshold_level = len(domain_blocks[domain]) / num_blocklists * 100 + # log.debug(f"domain threshold level: {domain_threshold_level}") else: raise ValueError(f"Unsupported threshold type '{threshold_type}'. Supported values are: 'count', 'pct'") + log.debug(f"Checking if {domain_threshold_level} >= {threshold} for {domain}") if domain_threshold_level >= threshold: # Add first block in the list to merged block = domain_blocks[domain][0] + log.debug(f"Yes. Merging block: {block}") + # Merge the others with this record for newblock in domain_blocks[domain][1:]: block = apply_mergeplan(block, newblock, mergeplan) - merged.blocks[block.domain] = block + merged.blocks[block.domain] = block return merged diff --git a/tests/test_merge_thresholds.py b/tests/test_merge_thresholds.py new file mode 100644 index 0000000..4cde03e --- /dev/null +++ b/tests/test_merge_thresholds.py @@ -0,0 +1,153 @@ +"""Test merge with thresholds +""" + +from fediblockhole.blocklists import Blocklist, parse_blocklist +from fediblockhole import merge_blocklists, apply_mergeplan + +from fediblockhole.const import SeverityLevel, DomainBlock + +datafile01 = "data-suspends-01.csv" +datafile02 = "data-silences-01.csv" +datafile03 = "data-noop-01.csv" + +import_fields = [ + 'domain', + 'severity', + 'public_comment', + 'private_comment', + 'reject_media', + 'reject_reports', + 'obfuscate' +] + +def load_test_blocklist_data(datafiles): + + blocklists = [] + + for df in datafiles: + with open(df) as fp: + data = fp.read() + bl = parse_blocklist(data, df, 'csv', import_fields) + blocklists.append(bl) + + return blocklists + +def test_mergeplan_count_2(): + """Only merge a block if present in 2 or more lists + """ + + bl_1 = Blocklist('test01', { + 'onemention.example.org': DomainBlock('onemention.example.org', 'suspend', '', '', True, True, True), + 'twomention.example.org': DomainBlock('twomention.example.org', 'suspend', '', '', True, True, True), + 'threemention.example.org': DomainBlock('threemention.example.org', 'suspend', '', '', True, True, True), + }) + + bl_2 = Blocklist('test2', { + 'twomention.example.org': DomainBlock('twomention.example.org', 'suspend', '', '', True, True, True), + 'threemention.example.org': DomainBlock('threemention.example.org', 'suspend', '', '', True, True, True), + }) + + bl_3 = Blocklist('test3', { + 'threemention.example.org': DomainBlock('threemention.example.org', 'suspend', '', '', True, True, True), + 'threemention.example.org': DomainBlock('threemention.example.org', 'suspend', '', '', True, True, True), + }) + + ml = merge_blocklists([bl_1, bl_2, bl_3], 'max', threshold=2) + + assert 'onemention.example.org' not in ml + assert 'twomention.example.org' in ml + assert 'threemention.example.org' in ml + +def test_mergeplan_count_3(): + """Only merge a block if present in 3 or more lists + """ + + bl_1 = Blocklist('test01', { + 'onemention.example.org': DomainBlock('onemention.example.org', 'suspend', '', '', True, True, True), + 'twomention.example.org': DomainBlock('twomention.example.org', 'suspend', '', '', True, True, True), + 'threemention.example.org': DomainBlock('threemention.example.org', 'suspend', '', '', True, True, True), + }) + + bl_2 = Blocklist('test2', { + 'twomention.example.org': DomainBlock('twomention.example.org', 'suspend', '', '', True, True, True), + 'threemention.example.org': DomainBlock('threemention.example.org', 'suspend', '', '', True, True, True), + }) + + bl_3 = Blocklist('test3', { + 'threemention.example.org': DomainBlock('threemention.example.org', 'suspend', '', '', True, True, True), + 'threemention.example.org': DomainBlock('threemention.example.org', 'suspend', '', '', True, True, True), + }) + + ml = merge_blocklists([bl_1, bl_2, bl_3], 'max', threshold=3) + + assert 'onemention.example.org' not in ml + assert 'twomention.example.org' not in ml + assert 'threemention.example.org' in ml + +def test_mergeplan_pct_30(): + """Only merge a block if present in 2 or more lists + """ + + bl_1 = Blocklist('test01', { + 'onemention.example.org': DomainBlock('onemention.example.org', 'suspend', '', '', True, True, True), + 'twomention.example.org': DomainBlock('twomention.example.org', 'suspend', '', '', True, True, True), + 'fourmention.example.org': DomainBlock('fourmention.example.org', 'suspend', '', '', True, True, True), + + }) + + bl_2 = Blocklist('test2', { + 'twomention.example.org': DomainBlock('twomention.example.org', 'suspend', '', '', True, True, True), + 'threemention.example.org': DomainBlock('threemention.example.org', 'suspend', '', '', True, True, True), + 'fourmention.example.org': DomainBlock('fourmention.example.org', 'suspend', '', '', True, True, True), + }) + + bl_3 = Blocklist('test3', { + 'threemention.example.org': DomainBlock('threemention.example.org', 'suspend', '', '', True, True, True), + 'fourmention.example.org': DomainBlock('fourmention.example.org', 'suspend', '', '', True, True, True), + }) + + bl_4 = Blocklist('test4', { + 'threemention.example.org': DomainBlock('threemention.example.org', 'suspend', '', '', True, True, True), + 'fourmention.example.org': DomainBlock('fourmention.example.org', 'suspend', '', '', True, True, True), + }) + + ml = merge_blocklists([bl_1, bl_2, bl_3, bl_4], 'max', threshold=30, threshold_type='pct') + + assert 'onemention.example.org' not in ml + assert 'twomention.example.org' in ml + assert 'threemention.example.org' in ml + assert 'fourmention.example.org' in ml + +def test_mergeplan_pct_55(): + """Only merge a block if present in 2 or more lists + """ + + bl_1 = Blocklist('test01', { + 'onemention.example.org': DomainBlock('onemention.example.org', 'suspend', '', '', True, True, True), + 'twomention.example.org': DomainBlock('twomention.example.org', 'suspend', '', '', True, True, True), + 'fourmention.example.org': DomainBlock('fourmention.example.org', 'suspend', '', '', True, True, True), + + }) + + bl_2 = Blocklist('test2', { + 'twomention.example.org': DomainBlock('twomention.example.org', 'suspend', '', '', True, True, True), + 'threemention.example.org': DomainBlock('threemention.example.org', 'suspend', '', '', True, True, True), + 'fourmention.example.org': DomainBlock('fourmention.example.org', 'suspend', '', '', True, True, True), + }) + + bl_3 = Blocklist('test3', { + 'threemention.example.org': DomainBlock('threemention.example.org', 'suspend', '', '', True, True, True), + 'fourmention.example.org': DomainBlock('fourmention.example.org', 'suspend', '', '', True, True, True), + }) + + bl_4 = Blocklist('test4', { + 'threemention.example.org': DomainBlock('threemention.example.org', 'suspend', '', '', True, True, True), + 'fourmention.example.org': DomainBlock('fourmention.example.org', 'suspend', '', '', True, True, True), + }) + + ml = merge_blocklists([bl_1, bl_2, bl_3, bl_4], 'max', threshold=55, threshold_type='pct') + + assert 'onemention.example.org' not in ml + assert 'twomention.example.org' not in ml + assert 'threemention.example.org' in ml + assert 'fourmention.example.org' in ml \ No newline at end of file