Merging domain above threshold needs to be in the threshold check block.
Added debugging statements for threshold merging.
This commit is contained in:
parent
1e377099f0
commit
b67ff0c471
|
@ -222,13 +222,17 @@ def merge_blocklists(blocklists: list[Blocklist], mergeplan: str='max',
|
|||
if threshold_type == 'count':
|
||||
domain_threshold_level = len(domain_blocks[domain])
|
||||
elif threshold_type == 'pct':
|
||||
domain_threshold_level = len(domain_blocks[domain]) / num_blocklists
|
||||
domain_threshold_level = len(domain_blocks[domain]) / num_blocklists * 100
|
||||
# log.debug(f"domain threshold level: {domain_threshold_level}")
|
||||
else:
|
||||
raise ValueError(f"Unsupported threshold type '{threshold_type}'. Supported values are: 'count', 'pct'")
|
||||
|
||||
log.debug(f"Checking if {domain_threshold_level} >= {threshold} for {domain}")
|
||||
if domain_threshold_level >= threshold:
|
||||
# Add first block in the list to merged
|
||||
block = domain_blocks[domain][0]
|
||||
log.debug(f"Yes. Merging block: {block}")
|
||||
|
||||
# Merge the others with this record
|
||||
for newblock in domain_blocks[domain][1:]:
|
||||
block = apply_mergeplan(block, newblock, mergeplan)
|
||||
|
|
|
@ -0,0 +1,153 @@
|
|||
"""Test merge with thresholds
|
||||
"""
|
||||
|
||||
from fediblockhole.blocklists import Blocklist, parse_blocklist
|
||||
from fediblockhole import merge_blocklists, apply_mergeplan
|
||||
|
||||
from fediblockhole.const import SeverityLevel, DomainBlock
|
||||
|
||||
datafile01 = "data-suspends-01.csv"
|
||||
datafile02 = "data-silences-01.csv"
|
||||
datafile03 = "data-noop-01.csv"
|
||||
|
||||
import_fields = [
|
||||
'domain',
|
||||
'severity',
|
||||
'public_comment',
|
||||
'private_comment',
|
||||
'reject_media',
|
||||
'reject_reports',
|
||||
'obfuscate'
|
||||
]
|
||||
|
||||
def load_test_blocklist_data(datafiles):
|
||||
|
||||
blocklists = []
|
||||
|
||||
for df in datafiles:
|
||||
with open(df) as fp:
|
||||
data = fp.read()
|
||||
bl = parse_blocklist(data, df, 'csv', import_fields)
|
||||
blocklists.append(bl)
|
||||
|
||||
return blocklists
|
||||
|
||||
def test_mergeplan_count_2():
|
||||
"""Only merge a block if present in 2 or more lists
|
||||
"""
|
||||
|
||||
bl_1 = Blocklist('test01', {
|
||||
'onemention.example.org': DomainBlock('onemention.example.org', 'suspend', '', '', True, True, True),
|
||||
'twomention.example.org': DomainBlock('twomention.example.org', 'suspend', '', '', True, True, True),
|
||||
'threemention.example.org': DomainBlock('threemention.example.org', 'suspend', '', '', True, True, True),
|
||||
})
|
||||
|
||||
bl_2 = Blocklist('test2', {
|
||||
'twomention.example.org': DomainBlock('twomention.example.org', 'suspend', '', '', True, True, True),
|
||||
'threemention.example.org': DomainBlock('threemention.example.org', 'suspend', '', '', True, True, True),
|
||||
})
|
||||
|
||||
bl_3 = Blocklist('test3', {
|
||||
'threemention.example.org': DomainBlock('threemention.example.org', 'suspend', '', '', True, True, True),
|
||||
'threemention.example.org': DomainBlock('threemention.example.org', 'suspend', '', '', True, True, True),
|
||||
})
|
||||
|
||||
ml = merge_blocklists([bl_1, bl_2, bl_3], 'max', threshold=2)
|
||||
|
||||
assert 'onemention.example.org' not in ml
|
||||
assert 'twomention.example.org' in ml
|
||||
assert 'threemention.example.org' in ml
|
||||
|
||||
def test_mergeplan_count_3():
|
||||
"""Only merge a block if present in 3 or more lists
|
||||
"""
|
||||
|
||||
bl_1 = Blocklist('test01', {
|
||||
'onemention.example.org': DomainBlock('onemention.example.org', 'suspend', '', '', True, True, True),
|
||||
'twomention.example.org': DomainBlock('twomention.example.org', 'suspend', '', '', True, True, True),
|
||||
'threemention.example.org': DomainBlock('threemention.example.org', 'suspend', '', '', True, True, True),
|
||||
})
|
||||
|
||||
bl_2 = Blocklist('test2', {
|
||||
'twomention.example.org': DomainBlock('twomention.example.org', 'suspend', '', '', True, True, True),
|
||||
'threemention.example.org': DomainBlock('threemention.example.org', 'suspend', '', '', True, True, True),
|
||||
})
|
||||
|
||||
bl_3 = Blocklist('test3', {
|
||||
'threemention.example.org': DomainBlock('threemention.example.org', 'suspend', '', '', True, True, True),
|
||||
'threemention.example.org': DomainBlock('threemention.example.org', 'suspend', '', '', True, True, True),
|
||||
})
|
||||
|
||||
ml = merge_blocklists([bl_1, bl_2, bl_3], 'max', threshold=3)
|
||||
|
||||
assert 'onemention.example.org' not in ml
|
||||
assert 'twomention.example.org' not in ml
|
||||
assert 'threemention.example.org' in ml
|
||||
|
||||
def test_mergeplan_pct_30():
|
||||
"""Only merge a block if present in 2 or more lists
|
||||
"""
|
||||
|
||||
bl_1 = Blocklist('test01', {
|
||||
'onemention.example.org': DomainBlock('onemention.example.org', 'suspend', '', '', True, True, True),
|
||||
'twomention.example.org': DomainBlock('twomention.example.org', 'suspend', '', '', True, True, True),
|
||||
'fourmention.example.org': DomainBlock('fourmention.example.org', 'suspend', '', '', True, True, True),
|
||||
|
||||
})
|
||||
|
||||
bl_2 = Blocklist('test2', {
|
||||
'twomention.example.org': DomainBlock('twomention.example.org', 'suspend', '', '', True, True, True),
|
||||
'threemention.example.org': DomainBlock('threemention.example.org', 'suspend', '', '', True, True, True),
|
||||
'fourmention.example.org': DomainBlock('fourmention.example.org', 'suspend', '', '', True, True, True),
|
||||
})
|
||||
|
||||
bl_3 = Blocklist('test3', {
|
||||
'threemention.example.org': DomainBlock('threemention.example.org', 'suspend', '', '', True, True, True),
|
||||
'fourmention.example.org': DomainBlock('fourmention.example.org', 'suspend', '', '', True, True, True),
|
||||
})
|
||||
|
||||
bl_4 = Blocklist('test4', {
|
||||
'threemention.example.org': DomainBlock('threemention.example.org', 'suspend', '', '', True, True, True),
|
||||
'fourmention.example.org': DomainBlock('fourmention.example.org', 'suspend', '', '', True, True, True),
|
||||
})
|
||||
|
||||
ml = merge_blocklists([bl_1, bl_2, bl_3, bl_4], 'max', threshold=30, threshold_type='pct')
|
||||
|
||||
assert 'onemention.example.org' not in ml
|
||||
assert 'twomention.example.org' in ml
|
||||
assert 'threemention.example.org' in ml
|
||||
assert 'fourmention.example.org' in ml
|
||||
|
||||
def test_mergeplan_pct_55():
|
||||
"""Only merge a block if present in 2 or more lists
|
||||
"""
|
||||
|
||||
bl_1 = Blocklist('test01', {
|
||||
'onemention.example.org': DomainBlock('onemention.example.org', 'suspend', '', '', True, True, True),
|
||||
'twomention.example.org': DomainBlock('twomention.example.org', 'suspend', '', '', True, True, True),
|
||||
'fourmention.example.org': DomainBlock('fourmention.example.org', 'suspend', '', '', True, True, True),
|
||||
|
||||
})
|
||||
|
||||
bl_2 = Blocklist('test2', {
|
||||
'twomention.example.org': DomainBlock('twomention.example.org', 'suspend', '', '', True, True, True),
|
||||
'threemention.example.org': DomainBlock('threemention.example.org', 'suspend', '', '', True, True, True),
|
||||
'fourmention.example.org': DomainBlock('fourmention.example.org', 'suspend', '', '', True, True, True),
|
||||
})
|
||||
|
||||
bl_3 = Blocklist('test3', {
|
||||
'threemention.example.org': DomainBlock('threemention.example.org', 'suspend', '', '', True, True, True),
|
||||
'fourmention.example.org': DomainBlock('fourmention.example.org', 'suspend', '', '', True, True, True),
|
||||
})
|
||||
|
||||
bl_4 = Blocklist('test4', {
|
||||
'threemention.example.org': DomainBlock('threemention.example.org', 'suspend', '', '', True, True, True),
|
||||
'fourmention.example.org': DomainBlock('fourmention.example.org', 'suspend', '', '', True, True, True),
|
||||
})
|
||||
|
||||
ml = merge_blocklists([bl_1, bl_2, bl_3, bl_4], 'max', threshold=55, threshold_type='pct')
|
||||
|
||||
assert 'onemention.example.org' not in ml
|
||||
assert 'twomention.example.org' not in ml
|
||||
assert 'threemention.example.org' in ml
|
||||
assert 'fourmention.example.org' in ml
|
Loading…
Reference in New Issue