Added parser for Mastodon 4.1 blocklist CSV format

This commit is contained in:
Justin Warren 2023-02-13 10:22:33 +11:00
parent 68c04fa5ce
commit 9f95f143df
No known key found for this signature in database
2 changed files with 100 additions and 0 deletions

View File

@ -160,6 +160,24 @@ class BlocklistParserCSV(BlocklistParser):
block.severity = self.max_severity block.severity = self.max_severity
return block return block
class BlocklistParserMastodonCSV(BlocklistParserCSV):
""" Parse Mastodon CSV formatted blocklists
The Mastodon v4.1.x domain block CSV export prefixes its
field names with a '#' character because reasons?
"""
do_preparse = True
def parse_item(self, blockitem: dict) -> DomainBlock:
"""Build a new blockitem dict with new un-#ed keys
"""
newdict = {}
for key in blockitem:
newkey = key.lstrip('#')
newdict[newkey] = blockitem[key]
return super().parse_item(newdict)
class RapidBlockParserCSV(BlocklistParserCSV): class RapidBlockParserCSV(BlocklistParserCSV):
""" Parse RapidBlock CSV blocklists """ Parse RapidBlock CSV blocklists
@ -223,6 +241,7 @@ def str2bool(boolstring: str) -> bool:
FORMAT_PARSERS = { FORMAT_PARSERS = {
'csv': BlocklistParserCSV, 'csv': BlocklistParserCSV,
'mastodon_csv': BlocklistParserMastodonCSV,
'json': BlocklistParserJSON, 'json': BlocklistParserJSON,
'mastodon_api_public': BlocklistParserMastodonAPIPublic, 'mastodon_api_public': BlocklistParserMastodonAPIPublic,
'rapidblock.csv': RapidBlockParserCSV, 'rapidblock.csv': RapidBlockParserCSV,

View File

@ -0,0 +1,81 @@
"""Tests of the CSV parsing
"""
from fediblockhole.blocklists import BlocklistParserMastodonCSV
from fediblockhole.const import SeverityLevel
def test_single_line():
csvdata = "example.org"
origin = "csvfile"
parser = BlocklistParserMastodonCSV()
bl = parser.parse_blocklist(csvdata, origin)
assert len(bl) == 0
def test_header_only():
csvdata = "#domain,#severity,#public_comment"
origin = "csvfile"
parser = BlocklistParserMastodonCSV()
bl = parser.parse_blocklist(csvdata, origin)
assert len(bl) == 0
def test_2_blocks():
csvdata = """domain,severity
example.org,silence
example2.org,suspend
"""
origin = "csvfile"
parser = BlocklistParserMastodonCSV()
bl = parser.parse_blocklist(csvdata, origin)
assert len(bl) == 2
assert 'example.org' in bl
def test_4_blocks():
csvdata = """domain,severity,public_comment
example.org,silence,"test 1"
example2.org,suspend,"test 2"
example3.org,noop,"test 3"
example4.org,suspend,"test 4"
"""
origin = "csvfile"
parser = BlocklistParserMastodonCSV()
bl = parser.parse_blocklist(csvdata, origin)
assert len(bl) == 4
assert 'example.org' in bl
assert 'example2.org' in bl
assert 'example3.org' in bl
assert 'example4.org' in bl
assert bl['example.org'].severity.level == SeverityLevel.SILENCE
assert bl['example2.org'].severity.level == SeverityLevel.SUSPEND
assert bl['example3.org'].severity.level == SeverityLevel.NONE
assert bl['example4.org'].severity.level == SeverityLevel.SUSPEND
def test_ignore_comments():
csvdata = """domain,severity,public_comment,private_comment
example.org,silence,"test 1","ignore me"
example2.org,suspend,"test 2","ignote me also"
example3.org,noop,"test 3","and me"
example4.org,suspend,"test 4","also me"
"""
origin = "csvfile"
parser = BlocklistParserMastodonCSV()
bl = parser.parse_blocklist(csvdata, origin)
assert len(bl) == 4
assert 'example.org' in bl
assert 'example2.org' in bl
assert 'example3.org' in bl
assert 'example4.org' in bl
assert bl['example.org'].public_comment == ''
assert bl['example.org'].private_comment == ''
assert bl['example3.org'].public_comment == ''
assert bl['example4.org'].private_comment == ''