Merge pull request #46 from eigenmagic/mastodon_csv_dialect

Mastodon csv dialect
2023-02-16 09:16:32 +11:00 · 2023-02-16 09:16:32 +11:00 · 5abaecb06e
parent 68c04fa5ce 77e7921e63
commit 5abaecb06e
3 changed files with 112 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -197,6 +197,7 @@ Supported formats are currently:
 - Comma-Separated Values (CSV)
 - JSON
 - Mastodon v4.1 flavoured CSV
 - RapidBlock CSV
 - RapidBlock JSON
@ -214,6 +215,17 @@ A CSV format blocklist must contain a header row with at least a `domain` and `s
 Optional fields, as listed about, may also be included.
 #### Mastodon v4.1 CSV format
 As of v4.1.0, Mastodon can export domain blocks as a CSV file. However, in their
 infinite wisdom, the Mastodon devs decided that field names should begin with a
 `#` character in the header, unlike the field names in the JSON output via the
 API… or in pretty much any other CSV file anywhere else.
 Setting the format to `mastodon_csv` will strip off the `#` character when
 parsing and FediBlockHole can then use Mastodon v4.1 CSV blocklists like any
 other CSV formatted blocklist.
 #### JSON format
 JSON is also supported. It uses the same format as the JSON returned from the Mastodon API.
--- a/src/fediblockhole/blocklists.py
+++ b/src/fediblockhole/blocklists.py
@ -160,6 +160,24 @@ class BlocklistParserCSV(BlocklistParser):
            block.severity = self.max_severity
        return block
 class BlocklistParserMastodonCSV(BlocklistParserCSV):
    """ Parse Mastodon CSV formatted blocklists
    The Mastodon v4.1.x domain block CSV export prefixes its
    field names with a '#' character because… reasons?
    """
    do_preparse = True
    def parse_item(self, blockitem: dict) -> DomainBlock:
        """Build a new blockitem dict with new un-#ed keys
        """
        newdict = {}
        for key in blockitem:
            newkey = key.lstrip('#')
            newdict[newkey] = blockitem[key]
        return super().parse_item(newdict)
 class RapidBlockParserCSV(BlocklistParserCSV):
    """ Parse RapidBlock CSV blocklists
@ -223,6 +241,7 @@ def str2bool(boolstring: str) -> bool:
 FORMAT_PARSERS = {
    'csv': BlocklistParserCSV,
    'mastodon_csv': BlocklistParserMastodonCSV,
    'json': BlocklistParserJSON,
    'mastodon_api_public': BlocklistParserMastodonAPIPublic,
    'rapidblock.csv': RapidBlockParserCSV,
--- a/tests/test_parser_csv_mastodon.py
+++ b/tests/test_parser_csv_mastodon.py
@ -0,0 +1,81 @@
 """Tests of the CSV parsing
 """
 from fediblockhole.blocklists import BlocklistParserMastodonCSV
 from fediblockhole.const import SeverityLevel
 def test_single_line():
    csvdata = "example.org"
    origin = "csvfile"
    parser = BlocklistParserMastodonCSV()
    bl = parser.parse_blocklist(csvdata, origin)
    assert len(bl) == 0
 def test_header_only():
    csvdata = "#domain,#severity,#public_comment"
    origin = "csvfile"
    parser = BlocklistParserMastodonCSV()
    bl = parser.parse_blocklist(csvdata, origin)
    assert len(bl) == 0
 def test_2_blocks():
    csvdata = """domain,severity
 example.org,silence
 example2.org,suspend
 """
    origin = "csvfile"
    parser = BlocklistParserMastodonCSV()
    bl = parser.parse_blocklist(csvdata, origin)
    assert len(bl) == 2
    assert 'example.org' in bl
 def test_4_blocks():
    csvdata = """domain,severity,public_comment
 example.org,silence,"test 1"
 example2.org,suspend,"test 2"
 example3.org,noop,"test 3"
 example4.org,suspend,"test 4"
 """
    origin = "csvfile"
    parser = BlocklistParserMastodonCSV()
    bl = parser.parse_blocklist(csvdata, origin)
    assert len(bl) == 4
    assert 'example.org' in bl
    assert 'example2.org' in bl
    assert 'example3.org' in bl
    assert 'example4.org' in bl
    assert bl['example.org'].severity.level == SeverityLevel.SILENCE
    assert bl['example2.org'].severity.level == SeverityLevel.SUSPEND
    assert bl['example3.org'].severity.level == SeverityLevel.NONE
    assert bl['example4.org'].severity.level == SeverityLevel.SUSPEND
 def test_ignore_comments():
    csvdata = """domain,severity,public_comment,private_comment
 example.org,silence,"test 1","ignore me"
 example2.org,suspend,"test 2","ignote me also"
 example3.org,noop,"test 3","and me"
 example4.org,suspend,"test 4","also me"
 """
    origin = "csvfile"
    parser = BlocklistParserMastodonCSV()
    bl = parser.parse_blocklist(csvdata, origin)
    assert len(bl) == 4
    assert 'example.org' in bl
    assert 'example2.org' in bl
    assert 'example3.org' in bl
    assert 'example4.org' in bl
    assert bl['example.org'].public_comment == ''
    assert bl['example.org'].private_comment == ''
    assert bl['example3.org'].public_comment == ''
    assert bl['example4.org'].private_comment == ''