Improved comment merging.

2023-01-12 07:39:17 +11:00 · 2023-01-12 07:39:17 +11:00 · 0a6eec4bf2
parent 10011a5ffb
commit 0a6eec4bf2
2 changed files with 112 additions and 9 deletions
--- a/src/fediblockhole/init.py
+++ b/src/fediblockhole/init.py
@ -151,16 +151,13 @@ def apply_mergeplan(oldblock: DomainBlock, newblock: DomainBlock, mergeplan: str
    # Default to the existing block definition
    blockdata = oldblock._asdict()

-    # If the public or private comment is different,
-    # append it to the existing comment, joined with ', '
-    # unless the comment is None or an empty string
+    # Merge comments
    keylist = ['public_comment', 'private_comment']
    for key in keylist:
        try:
-            if getattr(oldblock, key) not in ['', None] and getattr(newblock, key) not in ['', None] and getattr(oldblock, key) != getattr(newblock, key):
-                log.debug(f"old comment: '{getattr(oldblock, key)}'")
-                log.debug(f"new comment: '{getattr(newblock, key)}'")
-                blockdata[key] = ', '.join([getattr(oldblock, key), getattr(newblock, key)])
+            oldcomment = getattr(oldblock, key)
+            newcomment = getattr(newblock, key)
+            blockdata[key] = merge_comments(oldcomment, newcomment)
        except KeyError:
            log.debug(f"Key '{key}' missing from block definition so cannot compare. Continuing...")
            continue
@ -198,6 +195,50 @@ def apply_mergeplan(oldblock: DomainBlock, newblock: DomainBlock, mergeplan: str

    return DomainBlock(**blockdata)

+def merge_comments(oldcomment:str, newcomment:str) -> str:
+    """ Merge two comments
+
+    @param oldcomment: The original comment we're merging into
+    @param newcomment: The new commment we want to merge in
+    @returns: a new str of the merged comment
+    """
+    # Don't merge if both comments are None or ''
+    if oldcomment in ['', None] and newcomment in ['', None]:
+        return ''
+
+    # If both comments are the same, don't merge
+    if oldcomment == newcomment:
+        return oldcomment
+
+    # We want to skip duplicate fragments so we don't end up
+    # re-concatenating the same strings every time there's an
+    # update, causing the comment to grow without bound.
+    # We tokenize the comments, splitting them on ', ', and comparing
+    # the tokens, skipping duplicates.
+    # This means "boring, lack of moderation, nazis, scrapers" merging
+    # with "lack of moderation, scrapers" should result in
+    # "boring, lack of moderation, nazis, scrapers"
+    old_tokens = oldcomment.split(', ')
+    new_tokens = newcomment.split(', ')
+    
+    # Remove any empty string tokens that we get
+    while '' in old_tokens:
+        old_tokens.remove('')
+    while '' in new_tokens:
+        new_tokens.remove('')
+
+    # Remove duplicate tokens
+    for token in old_tokens:
+        if token in new_tokens:
+            new_tokens.remove(token)
+
+    # Combine whatever tokens are left into one set
+    tokenset = old_tokens
+    tokenset.extend(new_tokens)
+
+    # Return the merged string
+    return ', '.join(tokenset)
+
 def requests_headers(token: str=None):
    """Set common headers for requests"""
    headers = {
@ -323,7 +364,7 @@ def check_followed_severity(host: str, token: str, domain: str,
    # Return straight away if we're not increasing the severity
    if severity <= max_followed_severity:
        return severity
-        
+
    # If the instance has accounts that follow people on the to-be-blocked domain,
    # limit the maximum severity to the configured `max_followed_severity`.
    follows = fetch_instance_follows(token, host, domain)
--- a/tests/test_mergeplan.py
+++ b/tests/test_mergeplan.py
@ -2,7 +2,7 @@
 """

 from fediblockhole.blocklist_parser import parse_blocklist
-from fediblockhole import merge_blocklists
+from fediblockhole import merge_blocklists, merge_comments

 from fediblockhole.const import SeverityLevel

@ -137,3 +137,65 @@ def test_merge_duplicate_comments():
    # Nope, this breaks. Need to rethink duplicate comment merge.
    # assert bl['2diff-comment.example.org'].public_comment == 'Suspend comment 1, Public duplicate'

+def test_merge_comments_none():
+
+    a = None
+    b = None
+
+    r = merge_comments(a, b)
+
+    assert r == ''
+
+def test_merge_comments_empty():
+
+    a = ''
+    b = ''
+
+    r = merge_comments(a, b)
+
+    assert r == ''
+
+def test_merge_comments_left():
+
+    a = 'comment to merge'
+    b = ''
+
+    r = merge_comments(a, b)
+
+    assert r == 'comment to merge'
+
+def test_merge_comments_right():
+
+    a = ''
+    b = 'comment to merge'
+
+    r = merge_comments(a, b)
+
+    assert r == 'comment to merge'
+
+def test_merge_comments_same():
+
+    a = 'comment to merge'
+    b = 'comment to merge'
+
+    r = merge_comments(a, b)
+
+    assert r == 'comment to merge'
+
+def test_merge_comments_diff():
+
+    a = 'comment A'
+    b = 'comment B'
+
+    r = merge_comments(a, b)
+
+    assert r == 'comment A, comment B'
+
+def test_merge_comments_dups():
+
+    a = "boring, nazis, lack of moderation, flagged, special"
+    b = "spoon, nazis, flagged, lack of moderation, happy, fork"
+
+    r = merge_comments(a, b)
+
+    assert r == 'boring, nazis, lack of moderation, flagged, special, spoon, happy, fork'