Merge pull request #46 from eigenmagic/mastodon_csv_dialect

Mastodon csv dialect
This commit is contained in:
Justin Warren 2023-02-16 09:16:32 +11:00 committed by GitHub
commit 5abaecb06e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 112 additions and 0 deletions

View File

@ -197,6 +197,7 @@ Supported formats are currently:
- Comma-Separated Values (CSV)
- JSON
- Mastodon v4.1 flavoured CSV
- RapidBlock CSV
- RapidBlock JSON
@ -214,6 +215,17 @@ A CSV format blocklist must contain a header row with at least a `domain` and `s
Optional fields, as listed about, may also be included.
#### Mastodon v4.1 CSV format
As of v4.1.0, Mastodon can export domain blocks as a CSV file. However, in their
infinite wisdom, the Mastodon devs decided that field names should begin with a
`#` character in the header, unlike the field names in the JSON output via the
API… or in pretty much any other CSV file anywhere else.
Setting the format to `mastodon_csv` will strip off the `#` character when
parsing and FediBlockHole can then use Mastodon v4.1 CSV blocklists like any
other CSV formatted blocklist.
#### JSON format
JSON is also supported. It uses the same format as the JSON returned from the Mastodon API.

View File

@ -160,6 +160,24 @@ class BlocklistParserCSV(BlocklistParser):
block.severity = self.max_severity
return block
class BlocklistParserMastodonCSV(BlocklistParserCSV):
""" Parse Mastodon CSV formatted blocklists
The Mastodon v4.1.x domain block CSV export prefixes its
field names with a '#' character because reasons?
"""
do_preparse = True
def parse_item(self, blockitem: dict) -> DomainBlock:
"""Build a new blockitem dict with new un-#ed keys
"""
newdict = {}
for key in blockitem:
newkey = key.lstrip('#')
newdict[newkey] = blockitem[key]
return super().parse_item(newdict)
class RapidBlockParserCSV(BlocklistParserCSV):
""" Parse RapidBlock CSV blocklists
@ -223,6 +241,7 @@ def str2bool(boolstring: str) -> bool:
FORMAT_PARSERS = {
'csv': BlocklistParserCSV,
'mastodon_csv': BlocklistParserMastodonCSV,
'json': BlocklistParserJSON,
'mastodon_api_public': BlocklistParserMastodonAPIPublic,
'rapidblock.csv': RapidBlockParserCSV,

View File

@ -0,0 +1,81 @@
"""Tests of the CSV parsing
"""
from fediblockhole.blocklists import BlocklistParserMastodonCSV
from fediblockhole.const import SeverityLevel
def test_single_line():
csvdata = "example.org"
origin = "csvfile"
parser = BlocklistParserMastodonCSV()
bl = parser.parse_blocklist(csvdata, origin)
assert len(bl) == 0
def test_header_only():
csvdata = "#domain,#severity,#public_comment"
origin = "csvfile"
parser = BlocklistParserMastodonCSV()
bl = parser.parse_blocklist(csvdata, origin)
assert len(bl) == 0
def test_2_blocks():
csvdata = """domain,severity
example.org,silence
example2.org,suspend
"""
origin = "csvfile"
parser = BlocklistParserMastodonCSV()
bl = parser.parse_blocklist(csvdata, origin)
assert len(bl) == 2
assert 'example.org' in bl
def test_4_blocks():
csvdata = """domain,severity,public_comment
example.org,silence,"test 1"
example2.org,suspend,"test 2"
example3.org,noop,"test 3"
example4.org,suspend,"test 4"
"""
origin = "csvfile"
parser = BlocklistParserMastodonCSV()
bl = parser.parse_blocklist(csvdata, origin)
assert len(bl) == 4
assert 'example.org' in bl
assert 'example2.org' in bl
assert 'example3.org' in bl
assert 'example4.org' in bl
assert bl['example.org'].severity.level == SeverityLevel.SILENCE
assert bl['example2.org'].severity.level == SeverityLevel.SUSPEND
assert bl['example3.org'].severity.level == SeverityLevel.NONE
assert bl['example4.org'].severity.level == SeverityLevel.SUSPEND
def test_ignore_comments():
csvdata = """domain,severity,public_comment,private_comment
example.org,silence,"test 1","ignore me"
example2.org,suspend,"test 2","ignote me also"
example3.org,noop,"test 3","and me"
example4.org,suspend,"test 4","also me"
"""
origin = "csvfile"
parser = BlocklistParserMastodonCSV()
bl = parser.parse_blocklist(csvdata, origin)
assert len(bl) == 4
assert 'example.org' in bl
assert 'example2.org' in bl
assert 'example3.org' in bl
assert 'example4.org' in bl
assert bl['example.org'].public_comment == ''
assert bl['example.org'].private_comment == ''
assert bl['example3.org'].public_comment == ''
assert bl['example4.org'].private_comment == ''