Merge pull request #46 from eigenmagic/mastodon_csv_dialect
Mastodon csv dialect
This commit is contained in:
commit
5abaecb06e
12
README.md
12
README.md
|
@ -197,6 +197,7 @@ Supported formats are currently:
|
||||||
|
|
||||||
- Comma-Separated Values (CSV)
|
- Comma-Separated Values (CSV)
|
||||||
- JSON
|
- JSON
|
||||||
|
- Mastodon v4.1 flavoured CSV
|
||||||
- RapidBlock CSV
|
- RapidBlock CSV
|
||||||
- RapidBlock JSON
|
- RapidBlock JSON
|
||||||
|
|
||||||
|
@ -214,6 +215,17 @@ A CSV format blocklist must contain a header row with at least a `domain` and `s
|
||||||
|
|
||||||
Optional fields, as listed about, may also be included.
|
Optional fields, as listed about, may also be included.
|
||||||
|
|
||||||
|
#### Mastodon v4.1 CSV format
|
||||||
|
|
||||||
|
As of v4.1.0, Mastodon can export domain blocks as a CSV file. However, in their
|
||||||
|
infinite wisdom, the Mastodon devs decided that field names should begin with a
|
||||||
|
`#` character in the header, unlike the field names in the JSON output via the
|
||||||
|
API… or in pretty much any other CSV file anywhere else.
|
||||||
|
|
||||||
|
Setting the format to `mastodon_csv` will strip off the `#` character when
|
||||||
|
parsing and FediBlockHole can then use Mastodon v4.1 CSV blocklists like any
|
||||||
|
other CSV formatted blocklist.
|
||||||
|
|
||||||
#### JSON format
|
#### JSON format
|
||||||
|
|
||||||
JSON is also supported. It uses the same format as the JSON returned from the Mastodon API.
|
JSON is also supported. It uses the same format as the JSON returned from the Mastodon API.
|
||||||
|
|
|
@ -160,6 +160,24 @@ class BlocklistParserCSV(BlocklistParser):
|
||||||
block.severity = self.max_severity
|
block.severity = self.max_severity
|
||||||
return block
|
return block
|
||||||
|
|
||||||
|
class BlocklistParserMastodonCSV(BlocklistParserCSV):
|
||||||
|
""" Parse Mastodon CSV formatted blocklists
|
||||||
|
|
||||||
|
The Mastodon v4.1.x domain block CSV export prefixes its
|
||||||
|
field names with a '#' character because… reasons?
|
||||||
|
"""
|
||||||
|
do_preparse = True
|
||||||
|
|
||||||
|
def parse_item(self, blockitem: dict) -> DomainBlock:
|
||||||
|
"""Build a new blockitem dict with new un-#ed keys
|
||||||
|
"""
|
||||||
|
newdict = {}
|
||||||
|
for key in blockitem:
|
||||||
|
newkey = key.lstrip('#')
|
||||||
|
newdict[newkey] = blockitem[key]
|
||||||
|
|
||||||
|
return super().parse_item(newdict)
|
||||||
|
|
||||||
class RapidBlockParserCSV(BlocklistParserCSV):
|
class RapidBlockParserCSV(BlocklistParserCSV):
|
||||||
""" Parse RapidBlock CSV blocklists
|
""" Parse RapidBlock CSV blocklists
|
||||||
|
|
||||||
|
@ -223,6 +241,7 @@ def str2bool(boolstring: str) -> bool:
|
||||||
|
|
||||||
FORMAT_PARSERS = {
|
FORMAT_PARSERS = {
|
||||||
'csv': BlocklistParserCSV,
|
'csv': BlocklistParserCSV,
|
||||||
|
'mastodon_csv': BlocklistParserMastodonCSV,
|
||||||
'json': BlocklistParserJSON,
|
'json': BlocklistParserJSON,
|
||||||
'mastodon_api_public': BlocklistParserMastodonAPIPublic,
|
'mastodon_api_public': BlocklistParserMastodonAPIPublic,
|
||||||
'rapidblock.csv': RapidBlockParserCSV,
|
'rapidblock.csv': RapidBlockParserCSV,
|
||||||
|
|
|
@ -0,0 +1,81 @@
|
||||||
|
"""Tests of the CSV parsing
|
||||||
|
"""
|
||||||
|
|
||||||
|
from fediblockhole.blocklists import BlocklistParserMastodonCSV
|
||||||
|
from fediblockhole.const import SeverityLevel
|
||||||
|
|
||||||
|
|
||||||
|
def test_single_line():
|
||||||
|
csvdata = "example.org"
|
||||||
|
origin = "csvfile"
|
||||||
|
|
||||||
|
parser = BlocklistParserMastodonCSV()
|
||||||
|
bl = parser.parse_blocklist(csvdata, origin)
|
||||||
|
assert len(bl) == 0
|
||||||
|
|
||||||
|
def test_header_only():
|
||||||
|
csvdata = "#domain,#severity,#public_comment"
|
||||||
|
origin = "csvfile"
|
||||||
|
|
||||||
|
parser = BlocklistParserMastodonCSV()
|
||||||
|
bl = parser.parse_blocklist(csvdata, origin)
|
||||||
|
assert len(bl) == 0
|
||||||
|
|
||||||
|
def test_2_blocks():
|
||||||
|
csvdata = """domain,severity
|
||||||
|
example.org,silence
|
||||||
|
example2.org,suspend
|
||||||
|
"""
|
||||||
|
origin = "csvfile"
|
||||||
|
|
||||||
|
parser = BlocklistParserMastodonCSV()
|
||||||
|
bl = parser.parse_blocklist(csvdata, origin)
|
||||||
|
|
||||||
|
assert len(bl) == 2
|
||||||
|
assert 'example.org' in bl
|
||||||
|
|
||||||
|
def test_4_blocks():
|
||||||
|
csvdata = """domain,severity,public_comment
|
||||||
|
example.org,silence,"test 1"
|
||||||
|
example2.org,suspend,"test 2"
|
||||||
|
example3.org,noop,"test 3"
|
||||||
|
example4.org,suspend,"test 4"
|
||||||
|
"""
|
||||||
|
origin = "csvfile"
|
||||||
|
|
||||||
|
parser = BlocklistParserMastodonCSV()
|
||||||
|
bl = parser.parse_blocklist(csvdata, origin)
|
||||||
|
|
||||||
|
assert len(bl) == 4
|
||||||
|
assert 'example.org' in bl
|
||||||
|
assert 'example2.org' in bl
|
||||||
|
assert 'example3.org' in bl
|
||||||
|
assert 'example4.org' in bl
|
||||||
|
|
||||||
|
assert bl['example.org'].severity.level == SeverityLevel.SILENCE
|
||||||
|
assert bl['example2.org'].severity.level == SeverityLevel.SUSPEND
|
||||||
|
assert bl['example3.org'].severity.level == SeverityLevel.NONE
|
||||||
|
assert bl['example4.org'].severity.level == SeverityLevel.SUSPEND
|
||||||
|
|
||||||
|
def test_ignore_comments():
|
||||||
|
csvdata = """domain,severity,public_comment,private_comment
|
||||||
|
example.org,silence,"test 1","ignore me"
|
||||||
|
example2.org,suspend,"test 2","ignote me also"
|
||||||
|
example3.org,noop,"test 3","and me"
|
||||||
|
example4.org,suspend,"test 4","also me"
|
||||||
|
"""
|
||||||
|
origin = "csvfile"
|
||||||
|
|
||||||
|
parser = BlocklistParserMastodonCSV()
|
||||||
|
bl = parser.parse_blocklist(csvdata, origin)
|
||||||
|
|
||||||
|
assert len(bl) == 4
|
||||||
|
assert 'example.org' in bl
|
||||||
|
assert 'example2.org' in bl
|
||||||
|
assert 'example3.org' in bl
|
||||||
|
assert 'example4.org' in bl
|
||||||
|
|
||||||
|
assert bl['example.org'].public_comment == ''
|
||||||
|
assert bl['example.org'].private_comment == ''
|
||||||
|
assert bl['example3.org'].public_comment == ''
|
||||||
|
assert bl['example4.org'].private_comment == ''
|
Loading…
Reference in New Issue