Improved comment merging.

This commit is contained in:
Justin Warren 2023-01-12 07:39:17 +11:00
parent 10011a5ffb
commit 0a6eec4bf2
No known key found for this signature in database
2 changed files with 112 additions and 9 deletions

View File

@ -151,16 +151,13 @@ def apply_mergeplan(oldblock: DomainBlock, newblock: DomainBlock, mergeplan: str
# Default to the existing block definition # Default to the existing block definition
blockdata = oldblock._asdict() blockdata = oldblock._asdict()
# If the public or private comment is different, # Merge comments
# append it to the existing comment, joined with ', '
# unless the comment is None or an empty string
keylist = ['public_comment', 'private_comment'] keylist = ['public_comment', 'private_comment']
for key in keylist: for key in keylist:
try: try:
if getattr(oldblock, key) not in ['', None] and getattr(newblock, key) not in ['', None] and getattr(oldblock, key) != getattr(newblock, key): oldcomment = getattr(oldblock, key)
log.debug(f"old comment: '{getattr(oldblock, key)}'") newcomment = getattr(newblock, key)
log.debug(f"new comment: '{getattr(newblock, key)}'") blockdata[key] = merge_comments(oldcomment, newcomment)
blockdata[key] = ', '.join([getattr(oldblock, key), getattr(newblock, key)])
except KeyError: except KeyError:
log.debug(f"Key '{key}' missing from block definition so cannot compare. Continuing...") log.debug(f"Key '{key}' missing from block definition so cannot compare. Continuing...")
continue continue
@ -198,6 +195,50 @@ def apply_mergeplan(oldblock: DomainBlock, newblock: DomainBlock, mergeplan: str
return DomainBlock(**blockdata) return DomainBlock(**blockdata)
def merge_comments(oldcomment:str, newcomment:str) -> str:
""" Merge two comments
@param oldcomment: The original comment we're merging into
@param newcomment: The new commment we want to merge in
@returns: a new str of the merged comment
"""
# Don't merge if both comments are None or ''
if oldcomment in ['', None] and newcomment in ['', None]:
return ''
# If both comments are the same, don't merge
if oldcomment == newcomment:
return oldcomment
# We want to skip duplicate fragments so we don't end up
# re-concatenating the same strings every time there's an
# update, causing the comment to grow without bound.
# We tokenize the comments, splitting them on ', ', and comparing
# the tokens, skipping duplicates.
# This means "boring, lack of moderation, nazis, scrapers" merging
# with "lack of moderation, scrapers" should result in
# "boring, lack of moderation, nazis, scrapers"
old_tokens = oldcomment.split(', ')
new_tokens = newcomment.split(', ')
# Remove any empty string tokens that we get
while '' in old_tokens:
old_tokens.remove('')
while '' in new_tokens:
new_tokens.remove('')
# Remove duplicate tokens
for token in old_tokens:
if token in new_tokens:
new_tokens.remove(token)
# Combine whatever tokens are left into one set
tokenset = old_tokens
tokenset.extend(new_tokens)
# Return the merged string
return ', '.join(tokenset)
def requests_headers(token: str=None): def requests_headers(token: str=None):
"""Set common headers for requests""" """Set common headers for requests"""
headers = { headers = {
@ -323,7 +364,7 @@ def check_followed_severity(host: str, token: str, domain: str,
# Return straight away if we're not increasing the severity # Return straight away if we're not increasing the severity
if severity <= max_followed_severity: if severity <= max_followed_severity:
return severity return severity
# If the instance has accounts that follow people on the to-be-blocked domain, # If the instance has accounts that follow people on the to-be-blocked domain,
# limit the maximum severity to the configured `max_followed_severity`. # limit the maximum severity to the configured `max_followed_severity`.
follows = fetch_instance_follows(token, host, domain) follows = fetch_instance_follows(token, host, domain)

View File

@ -2,7 +2,7 @@
""" """
from fediblockhole.blocklist_parser import parse_blocklist from fediblockhole.blocklist_parser import parse_blocklist
from fediblockhole import merge_blocklists from fediblockhole import merge_blocklists, merge_comments
from fediblockhole.const import SeverityLevel from fediblockhole.const import SeverityLevel
@ -137,3 +137,65 @@ def test_merge_duplicate_comments():
# Nope, this breaks. Need to rethink duplicate comment merge. # Nope, this breaks. Need to rethink duplicate comment merge.
# assert bl['2diff-comment.example.org'].public_comment == 'Suspend comment 1, Public duplicate' # assert bl['2diff-comment.example.org'].public_comment == 'Suspend comment 1, Public duplicate'
def test_merge_comments_none():
a = None
b = None
r = merge_comments(a, b)
assert r == ''
def test_merge_comments_empty():
a = ''
b = ''
r = merge_comments(a, b)
assert r == ''
def test_merge_comments_left():
a = 'comment to merge'
b = ''
r = merge_comments(a, b)
assert r == 'comment to merge'
def test_merge_comments_right():
a = ''
b = 'comment to merge'
r = merge_comments(a, b)
assert r == 'comment to merge'
def test_merge_comments_same():
a = 'comment to merge'
b = 'comment to merge'
r = merge_comments(a, b)
assert r == 'comment to merge'
def test_merge_comments_diff():
a = 'comment A'
b = 'comment B'
r = merge_comments(a, b)
assert r == 'comment A, comment B'
def test_merge_comments_dups():
a = "boring, nazis, lack of moderation, flagged, special"
b = "spoon, nazis, flagged, lack of moderation, happy, fork"
r = merge_comments(a, b)
assert r == 'boring, nazis, lack of moderation, flagged, special, spoon, happy, fork'