From 0a6eec4bf299ac52edf4d3705c7ff04f54713daa Mon Sep 17 00:00:00 2001 From: Justin Warren Date: Thu, 12 Jan 2023 07:39:17 +1100 Subject: [PATCH] Improved comment merging. --- src/fediblockhole/__init__.py | 57 ++++++++++++++++++++++++++----- tests/test_mergeplan.py | 64 ++++++++++++++++++++++++++++++++++- 2 files changed, 112 insertions(+), 9 deletions(-) diff --git a/src/fediblockhole/__init__.py b/src/fediblockhole/__init__.py index 3c5a5c5..f79b7d6 100755 --- a/src/fediblockhole/__init__.py +++ b/src/fediblockhole/__init__.py @@ -151,16 +151,13 @@ def apply_mergeplan(oldblock: DomainBlock, newblock: DomainBlock, mergeplan: str # Default to the existing block definition blockdata = oldblock._asdict() - # If the public or private comment is different, - # append it to the existing comment, joined with ', ' - # unless the comment is None or an empty string + # Merge comments keylist = ['public_comment', 'private_comment'] for key in keylist: try: - if getattr(oldblock, key) not in ['', None] and getattr(newblock, key) not in ['', None] and getattr(oldblock, key) != getattr(newblock, key): - log.debug(f"old comment: '{getattr(oldblock, key)}'") - log.debug(f"new comment: '{getattr(newblock, key)}'") - blockdata[key] = ', '.join([getattr(oldblock, key), getattr(newblock, key)]) + oldcomment = getattr(oldblock, key) + newcomment = getattr(newblock, key) + blockdata[key] = merge_comments(oldcomment, newcomment) except KeyError: log.debug(f"Key '{key}' missing from block definition so cannot compare. Continuing...") continue @@ -198,6 +195,50 @@ def apply_mergeplan(oldblock: DomainBlock, newblock: DomainBlock, mergeplan: str return DomainBlock(**blockdata) +def merge_comments(oldcomment:str, newcomment:str) -> str: + """ Merge two comments + + @param oldcomment: The original comment we're merging into + @param newcomment: The new commment we want to merge in + @returns: a new str of the merged comment + """ + # Don't merge if both comments are None or '' + if oldcomment in ['', None] and newcomment in ['', None]: + return '' + + # If both comments are the same, don't merge + if oldcomment == newcomment: + return oldcomment + + # We want to skip duplicate fragments so we don't end up + # re-concatenating the same strings every time there's an + # update, causing the comment to grow without bound. + # We tokenize the comments, splitting them on ', ', and comparing + # the tokens, skipping duplicates. + # This means "boring, lack of moderation, nazis, scrapers" merging + # with "lack of moderation, scrapers" should result in + # "boring, lack of moderation, nazis, scrapers" + old_tokens = oldcomment.split(', ') + new_tokens = newcomment.split(', ') + + # Remove any empty string tokens that we get + while '' in old_tokens: + old_tokens.remove('') + while '' in new_tokens: + new_tokens.remove('') + + # Remove duplicate tokens + for token in old_tokens: + if token in new_tokens: + new_tokens.remove(token) + + # Combine whatever tokens are left into one set + tokenset = old_tokens + tokenset.extend(new_tokens) + + # Return the merged string + return ', '.join(tokenset) + def requests_headers(token: str=None): """Set common headers for requests""" headers = { @@ -323,7 +364,7 @@ def check_followed_severity(host: str, token: str, domain: str, # Return straight away if we're not increasing the severity if severity <= max_followed_severity: return severity - + # If the instance has accounts that follow people on the to-be-blocked domain, # limit the maximum severity to the configured `max_followed_severity`. follows = fetch_instance_follows(token, host, domain) diff --git a/tests/test_mergeplan.py b/tests/test_mergeplan.py index 16b2bca..142f1da 100644 --- a/tests/test_mergeplan.py +++ b/tests/test_mergeplan.py @@ -2,7 +2,7 @@ """ from fediblockhole.blocklist_parser import parse_blocklist -from fediblockhole import merge_blocklists +from fediblockhole import merge_blocklists, merge_comments from fediblockhole.const import SeverityLevel @@ -137,3 +137,65 @@ def test_merge_duplicate_comments(): # Nope, this breaks. Need to rethink duplicate comment merge. # assert bl['2diff-comment.example.org'].public_comment == 'Suspend comment 1, Public duplicate' +def test_merge_comments_none(): + + a = None + b = None + + r = merge_comments(a, b) + + assert r == '' + +def test_merge_comments_empty(): + + a = '' + b = '' + + r = merge_comments(a, b) + + assert r == '' + +def test_merge_comments_left(): + + a = 'comment to merge' + b = '' + + r = merge_comments(a, b) + + assert r == 'comment to merge' + +def test_merge_comments_right(): + + a = '' + b = 'comment to merge' + + r = merge_comments(a, b) + + assert r == 'comment to merge' + +def test_merge_comments_same(): + + a = 'comment to merge' + b = 'comment to merge' + + r = merge_comments(a, b) + + assert r == 'comment to merge' + +def test_merge_comments_diff(): + + a = 'comment A' + b = 'comment B' + + r = merge_comments(a, b) + + assert r == 'comment A, comment B' + +def test_merge_comments_dups(): + + a = "boring, nazis, lack of moderation, flagged, special" + b = "spoon, nazis, flagged, lack of moderation, happy, fork" + + r = merge_comments(a, b) + + assert r == 'boring, nazis, lack of moderation, flagged, special, spoon, happy, fork' \ No newline at end of file