From 9f95f143df63fdf18d165530674cd0fa11f6ed28 Mon Sep 17 00:00:00 2001 From: Justin Warren Date: Mon, 13 Feb 2023 10:22:33 +1100 Subject: [PATCH 1/2] Added parser for Mastodon 4.1 blocklist CSV format --- src/fediblockhole/blocklists.py | 19 ++++++++ tests/test_parser_csv_mastodon.py | 81 +++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+) create mode 100644 tests/test_parser_csv_mastodon.py diff --git a/src/fediblockhole/blocklists.py b/src/fediblockhole/blocklists.py index 7a9e44f..1edf886 100644 --- a/src/fediblockhole/blocklists.py +++ b/src/fediblockhole/blocklists.py @@ -160,6 +160,24 @@ class BlocklistParserCSV(BlocklistParser): block.severity = self.max_severity return block +class BlocklistParserMastodonCSV(BlocklistParserCSV): + """ Parse Mastodon CSV formatted blocklists + + The Mastodon v4.1.x domain block CSV export prefixes its + field names with a '#' character because… reasons? + """ + do_preparse = True + + def parse_item(self, blockitem: dict) -> DomainBlock: + """Build a new blockitem dict with new un-#ed keys + """ + newdict = {} + for key in blockitem: + newkey = key.lstrip('#') + newdict[newkey] = blockitem[key] + + return super().parse_item(newdict) + class RapidBlockParserCSV(BlocklistParserCSV): """ Parse RapidBlock CSV blocklists @@ -223,6 +241,7 @@ def str2bool(boolstring: str) -> bool: FORMAT_PARSERS = { 'csv': BlocklistParserCSV, + 'mastodon_csv': BlocklistParserMastodonCSV, 'json': BlocklistParserJSON, 'mastodon_api_public': BlocklistParserMastodonAPIPublic, 'rapidblock.csv': RapidBlockParserCSV, diff --git a/tests/test_parser_csv_mastodon.py b/tests/test_parser_csv_mastodon.py new file mode 100644 index 0000000..6e85c71 --- /dev/null +++ b/tests/test_parser_csv_mastodon.py @@ -0,0 +1,81 @@ +"""Tests of the CSV parsing +""" + +from fediblockhole.blocklists import BlocklistParserMastodonCSV +from fediblockhole.const import SeverityLevel + + +def test_single_line(): + csvdata = "example.org" + origin = "csvfile" + + parser = BlocklistParserMastodonCSV() + bl = parser.parse_blocklist(csvdata, origin) + assert len(bl) == 0 + +def test_header_only(): + csvdata = "#domain,#severity,#public_comment" + origin = "csvfile" + + parser = BlocklistParserMastodonCSV() + bl = parser.parse_blocklist(csvdata, origin) + assert len(bl) == 0 + +def test_2_blocks(): + csvdata = """domain,severity +example.org,silence +example2.org,suspend +""" + origin = "csvfile" + + parser = BlocklistParserMastodonCSV() + bl = parser.parse_blocklist(csvdata, origin) + + assert len(bl) == 2 + assert 'example.org' in bl + +def test_4_blocks(): + csvdata = """domain,severity,public_comment +example.org,silence,"test 1" +example2.org,suspend,"test 2" +example3.org,noop,"test 3" +example4.org,suspend,"test 4" +""" + origin = "csvfile" + + parser = BlocklistParserMastodonCSV() + bl = parser.parse_blocklist(csvdata, origin) + + assert len(bl) == 4 + assert 'example.org' in bl + assert 'example2.org' in bl + assert 'example3.org' in bl + assert 'example4.org' in bl + + assert bl['example.org'].severity.level == SeverityLevel.SILENCE + assert bl['example2.org'].severity.level == SeverityLevel.SUSPEND + assert bl['example3.org'].severity.level == SeverityLevel.NONE + assert bl['example4.org'].severity.level == SeverityLevel.SUSPEND + +def test_ignore_comments(): + csvdata = """domain,severity,public_comment,private_comment +example.org,silence,"test 1","ignore me" +example2.org,suspend,"test 2","ignote me also" +example3.org,noop,"test 3","and me" +example4.org,suspend,"test 4","also me" +""" + origin = "csvfile" + + parser = BlocklistParserMastodonCSV() + bl = parser.parse_blocklist(csvdata, origin) + + assert len(bl) == 4 + assert 'example.org' in bl + assert 'example2.org' in bl + assert 'example3.org' in bl + assert 'example4.org' in bl + + assert bl['example.org'].public_comment == '' + assert bl['example.org'].private_comment == '' + assert bl['example3.org'].public_comment == '' + assert bl['example4.org'].private_comment == '' \ No newline at end of file From 77e7921e6392146751be52c569620c2090b0eaa2 Mon Sep 17 00:00:00 2001 From: Justin Warren Date: Mon, 13 Feb 2023 10:28:05 +1100 Subject: [PATCH 2/2] Updated documentation with Mastodon CSV format --- README.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/README.md b/README.md index 5db678f..a0fc832 100644 --- a/README.md +++ b/README.md @@ -197,6 +197,7 @@ Supported formats are currently: - Comma-Separated Values (CSV) - JSON + - Mastodon v4.1 flavoured CSV - RapidBlock CSV - RapidBlock JSON @@ -214,6 +215,17 @@ A CSV format blocklist must contain a header row with at least a `domain` and `s Optional fields, as listed about, may also be included. +#### Mastodon v4.1 CSV format + +As of v4.1.0, Mastodon can export domain blocks as a CSV file. However, in their +infinite wisdom, the Mastodon devs decided that field names should begin with a +`#` character in the header, unlike the field names in the JSON output via the +API… or in pretty much any other CSV file anywhere else. + +Setting the format to `mastodon_csv` will strip off the `#` character when +parsing and FediBlockHole can then use Mastodon v4.1 CSV blocklists like any +other CSV formatted blocklist. + #### JSON format JSON is also supported. It uses the same format as the JSON returned from the Mastodon API.