From 9f95f143df63fdf18d165530674cd0fa11f6ed28 Mon Sep 17 00:00:00 2001
From: Justin Warren <justin@eigenmagic.com>
Date: Mon, 13 Feb 2023 10:22:33 +1100
Subject: [PATCH 1/2] Added parser for Mastodon 4.1 blocklist CSV format

---
 src/fediblockhole/blocklists.py   | 19 ++++++++
 tests/test_parser_csv_mastodon.py | 81 +++++++++++++++++++++++++++++++
 2 files changed, 100 insertions(+)
 create mode 100644 tests/test_parser_csv_mastodon.py

diff --git a/src/fediblockhole/blocklists.py b/src/fediblockhole/blocklists.py
index 7a9e44f..1edf886 100644
--- a/src/fediblockhole/blocklists.py
+++ b/src/fediblockhole/blocklists.py
@@ -160,6 +160,24 @@ class BlocklistParserCSV(BlocklistParser):
             block.severity = self.max_severity
         return block
 
+class BlocklistParserMastodonCSV(BlocklistParserCSV):
+    """ Parse Mastodon CSV formatted blocklists
+
+    The Mastodon v4.1.x domain block CSV export prefixes its
+    field names with a '#' character because… reasons?
+    """
+    do_preparse = True
+
+    def parse_item(self, blockitem: dict) -> DomainBlock:
+        """Build a new blockitem dict with new un-#ed keys
+        """
+        newdict = {}
+        for key in blockitem:
+            newkey = key.lstrip('#')
+            newdict[newkey] = blockitem[key]
+
+        return super().parse_item(newdict)
+
 class RapidBlockParserCSV(BlocklistParserCSV):
     """ Parse RapidBlock CSV blocklists
 
@@ -223,6 +241,7 @@ def str2bool(boolstring: str) -> bool:
 
 FORMAT_PARSERS = {
     'csv': BlocklistParserCSV,
+    'mastodon_csv': BlocklistParserMastodonCSV,
     'json': BlocklistParserJSON,
     'mastodon_api_public': BlocklistParserMastodonAPIPublic,
     'rapidblock.csv': RapidBlockParserCSV,
diff --git a/tests/test_parser_csv_mastodon.py b/tests/test_parser_csv_mastodon.py
new file mode 100644
index 0000000..6e85c71
--- /dev/null
+++ b/tests/test_parser_csv_mastodon.py
@@ -0,0 +1,81 @@
+"""Tests of the CSV parsing
+"""
+
+from fediblockhole.blocklists import BlocklistParserMastodonCSV
+from fediblockhole.const import SeverityLevel
+
+
+def test_single_line():
+    csvdata = "example.org"
+    origin = "csvfile"
+
+    parser = BlocklistParserMastodonCSV()
+    bl = parser.parse_blocklist(csvdata, origin)
+    assert len(bl) == 0
+
+def test_header_only():
+    csvdata = "#domain,#severity,#public_comment"
+    origin = "csvfile"
+
+    parser = BlocklistParserMastodonCSV()
+    bl = parser.parse_blocklist(csvdata, origin)
+    assert len(bl) == 0
+
+def test_2_blocks():
+    csvdata = """domain,severity
+example.org,silence
+example2.org,suspend
+"""
+    origin = "csvfile"
+
+    parser = BlocklistParserMastodonCSV()
+    bl = parser.parse_blocklist(csvdata, origin)
+
+    assert len(bl) == 2
+    assert 'example.org' in bl
+
+def test_4_blocks():
+    csvdata = """domain,severity,public_comment
+example.org,silence,"test 1"
+example2.org,suspend,"test 2"
+example3.org,noop,"test 3"
+example4.org,suspend,"test 4"
+"""
+    origin = "csvfile"
+
+    parser = BlocklistParserMastodonCSV()
+    bl = parser.parse_blocklist(csvdata, origin)
+
+    assert len(bl) == 4
+    assert 'example.org' in bl
+    assert 'example2.org' in bl
+    assert 'example3.org' in bl
+    assert 'example4.org' in bl
+
+    assert bl['example.org'].severity.level == SeverityLevel.SILENCE
+    assert bl['example2.org'].severity.level == SeverityLevel.SUSPEND
+    assert bl['example3.org'].severity.level == SeverityLevel.NONE
+    assert bl['example4.org'].severity.level == SeverityLevel.SUSPEND
+
+def test_ignore_comments():
+    csvdata = """domain,severity,public_comment,private_comment
+example.org,silence,"test 1","ignore me"
+example2.org,suspend,"test 2","ignote me also"
+example3.org,noop,"test 3","and me"
+example4.org,suspend,"test 4","also me"
+"""
+    origin = "csvfile"
+
+    parser = BlocklistParserMastodonCSV()
+    bl = parser.parse_blocklist(csvdata, origin)
+
+    assert len(bl) == 4
+    assert 'example.org' in bl
+    assert 'example2.org' in bl
+    assert 'example3.org' in bl
+    assert 'example4.org' in bl
+
+    assert bl['example.org'].public_comment == ''
+    assert bl['example.org'].private_comment == ''
+    assert bl['example3.org'].public_comment == ''
+    assert bl['example4.org'].private_comment == ''
\ No newline at end of file

From 77e7921e6392146751be52c569620c2090b0eaa2 Mon Sep 17 00:00:00 2001
From: Justin Warren <justin@eigenmagic.com>
Date: Mon, 13 Feb 2023 10:28:05 +1100
Subject: [PATCH 2/2] Updated documentation with Mastodon CSV format

---
 README.md | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/README.md b/README.md
index 5db678f..a0fc832 100644
--- a/README.md
+++ b/README.md
@@ -197,6 +197,7 @@ Supported formats are currently:
 
  - Comma-Separated Values (CSV)
  - JSON
+ - Mastodon v4.1 flavoured CSV
  - RapidBlock CSV
  - RapidBlock JSON
 
@@ -214,6 +215,17 @@ A CSV format blocklist must contain a header row with at least a `domain` and `s
 
 Optional fields, as listed about, may also be included.
 
+#### Mastodon v4.1 CSV format
+
+As of v4.1.0, Mastodon can export domain blocks as a CSV file. However, in their
+infinite wisdom, the Mastodon devs decided that field names should begin with a
+`#` character in the header, unlike the field names in the JSON output via the
+API… or in pretty much any other CSV file anywhere else.
+
+Setting the format to `mastodon_csv` will strip off the `#` character when
+parsing and FediBlockHole can then use Mastodon v4.1 CSV blocklists like any
+other CSV formatted blocklist.
+
 #### JSON format
 
 JSON is also supported. It uses the same format as the JSON returned from the Mastodon API.