Merge pull request #46 from eigenmagic/mastodon_csv_dialect

Mastodon csv dialect
This commit is contained in:
Justin Warren 2023-02-16 09:16:32 +11:00 committed by GitHub
commit 5abaecb06e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 112 additions and 0 deletions

View File

@ -197,6 +197,7 @@ Supported formats are currently:
- Comma-Separated Values (CSV) - Comma-Separated Values (CSV)
- JSON - JSON
- Mastodon v4.1 flavoured CSV
- RapidBlock CSV - RapidBlock CSV
- RapidBlock JSON - RapidBlock JSON
@ -214,6 +215,17 @@ A CSV format blocklist must contain a header row with at least a `domain` and `s
Optional fields, as listed about, may also be included. Optional fields, as listed about, may also be included.
#### Mastodon v4.1 CSV format
As of v4.1.0, Mastodon can export domain blocks as a CSV file. However, in their
infinite wisdom, the Mastodon devs decided that field names should begin with a
`#` character in the header, unlike the field names in the JSON output via the
API… or in pretty much any other CSV file anywhere else.
Setting the format to `mastodon_csv` will strip off the `#` character when
parsing and FediBlockHole can then use Mastodon v4.1 CSV blocklists like any
other CSV formatted blocklist.
#### JSON format #### JSON format
JSON is also supported. It uses the same format as the JSON returned from the Mastodon API. JSON is also supported. It uses the same format as the JSON returned from the Mastodon API.

View File

@ -160,6 +160,24 @@ class BlocklistParserCSV(BlocklistParser):
block.severity = self.max_severity block.severity = self.max_severity
return block return block
class BlocklistParserMastodonCSV(BlocklistParserCSV):
""" Parse Mastodon CSV formatted blocklists
The Mastodon v4.1.x domain block CSV export prefixes its
field names with a '#' character because reasons?
"""
do_preparse = True
def parse_item(self, blockitem: dict) -> DomainBlock:
"""Build a new blockitem dict with new un-#ed keys
"""
newdict = {}
for key in blockitem:
newkey = key.lstrip('#')
newdict[newkey] = blockitem[key]
return super().parse_item(newdict)
class RapidBlockParserCSV(BlocklistParserCSV): class RapidBlockParserCSV(BlocklistParserCSV):
""" Parse RapidBlock CSV blocklists """ Parse RapidBlock CSV blocklists
@ -223,6 +241,7 @@ def str2bool(boolstring: str) -> bool:
FORMAT_PARSERS = { FORMAT_PARSERS = {
'csv': BlocklistParserCSV, 'csv': BlocklistParserCSV,
'mastodon_csv': BlocklistParserMastodonCSV,
'json': BlocklistParserJSON, 'json': BlocklistParserJSON,
'mastodon_api_public': BlocklistParserMastodonAPIPublic, 'mastodon_api_public': BlocklistParserMastodonAPIPublic,
'rapidblock.csv': RapidBlockParserCSV, 'rapidblock.csv': RapidBlockParserCSV,

View File

@ -0,0 +1,81 @@
"""Tests of the CSV parsing
"""
from fediblockhole.blocklists import BlocklistParserMastodonCSV
from fediblockhole.const import SeverityLevel
def test_single_line():
csvdata = "example.org"
origin = "csvfile"
parser = BlocklistParserMastodonCSV()
bl = parser.parse_blocklist(csvdata, origin)
assert len(bl) == 0
def test_header_only():
csvdata = "#domain,#severity,#public_comment"
origin = "csvfile"
parser = BlocklistParserMastodonCSV()
bl = parser.parse_blocklist(csvdata, origin)
assert len(bl) == 0
def test_2_blocks():
csvdata = """domain,severity
example.org,silence
example2.org,suspend
"""
origin = "csvfile"
parser = BlocklistParserMastodonCSV()
bl = parser.parse_blocklist(csvdata, origin)
assert len(bl) == 2
assert 'example.org' in bl
def test_4_blocks():
csvdata = """domain,severity,public_comment
example.org,silence,"test 1"
example2.org,suspend,"test 2"
example3.org,noop,"test 3"
example4.org,suspend,"test 4"
"""
origin = "csvfile"
parser = BlocklistParserMastodonCSV()
bl = parser.parse_blocklist(csvdata, origin)
assert len(bl) == 4
assert 'example.org' in bl
assert 'example2.org' in bl
assert 'example3.org' in bl
assert 'example4.org' in bl
assert bl['example.org'].severity.level == SeverityLevel.SILENCE
assert bl['example2.org'].severity.level == SeverityLevel.SUSPEND
assert bl['example3.org'].severity.level == SeverityLevel.NONE
assert bl['example4.org'].severity.level == SeverityLevel.SUSPEND
def test_ignore_comments():
csvdata = """domain,severity,public_comment,private_comment
example.org,silence,"test 1","ignore me"
example2.org,suspend,"test 2","ignote me also"
example3.org,noop,"test 3","and me"
example4.org,suspend,"test 4","also me"
"""
origin = "csvfile"
parser = BlocklistParserMastodonCSV()
bl = parser.parse_blocklist(csvdata, origin)
assert len(bl) == 4
assert 'example.org' in bl
assert 'example2.org' in bl
assert 'example3.org' in bl
assert 'example4.org' in bl
assert bl['example.org'].public_comment == ''
assert bl['example.org'].private_comment == ''
assert bl['example3.org'].public_comment == ''
assert bl['example4.org'].private_comment == ''