222 lines
7.1 KiB
Plaintext
222 lines
7.1 KiB
Plaintext
|
#!/bin/bash
|
||
|
|
||
|
# Misskey Safety Scan
|
||
|
#
|
||
|
# Scans the descriptions of all federated instances using an LLM to
|
||
|
# analyze the text to determine if the instance is likely to host
|
||
|
# material considered inappropriate by most people.
|
||
|
#
|
||
|
# This doesn't mean the content on other instances is illegal or
|
||
|
# morally wrong, but it does mean that the instance is a good
|
||
|
# candidate for manual analysis by the local instance administrator.
|
||
|
#
|
||
|
# Usage Instructions:
|
||
|
#
|
||
|
# 1. Find your API key (the "i" parameter) from an API request in
|
||
|
# browser console.
|
||
|
# 2. Execute script like "scan-federated-instances <instance> <key> <model>"
|
||
|
# 3. Wait a really long time.
|
||
|
#
|
||
|
# Output:
|
||
|
# - unsafe.csv: Instances with descriptions considered inappropriate.
|
||
|
# - failures.csv: Instances that whose output could not be determined.
|
||
|
#
|
||
|
# What to do with output:
|
||
|
#
|
||
|
# 1. Run scan-output.csv through the verify-scan script. This will
|
||
|
# double check descriptions.
|
||
|
#
|
||
|
# 2. Take appropriate action against instances that you feel promote
|
||
|
# content that is against your instance's terms of service, or are
|
||
|
# illegal.
|
||
|
#
|
||
|
# 3. Check failures.csv manually for failure reasons.
|
||
|
|
||
|
###### MAIN CONFIG ######
|
||
|
LIMIT=100
|
||
|
# User information
|
||
|
export INSTANCE="$1"
|
||
|
export API_KEY="$2"
|
||
|
export LLM_MODEL="$3"
|
||
|
#########################
|
||
|
|
||
|
#########################
|
||
|
export UNSAFE="scan-output.csv"
|
||
|
export FAILURES="failures.csv"
|
||
|
#########################
|
||
|
|
||
|
#### LLM INFORMATION ####
|
||
|
export MAX_REQUESTS=4
|
||
|
|
||
|
read -r -d '' DESCRIPTION_PROMPT <<'EOF'
|
||
|
Here is the description of a Mastodon instance.
|
||
|
Evaluate if it is safe or not:
|
||
|
EOF
|
||
|
#########################
|
||
|
|
||
|
# Run inference multiple times to account for variance in LLM
|
||
|
# responses.
|
||
|
get_llm_response() {
|
||
|
prompt="$1"
|
||
|
llm_output1=$(echo "$prompt" | aichat -S -m $LLM_MODEL | head -n 2)
|
||
|
llm_output2=$(echo "$prompt" | aichat -S -m $LLM_MODEL | head -n 2)
|
||
|
llm_output3=$(echo "$prompt" | aichat -S -m $LLM_MODEL | head -n 2)
|
||
|
|
||
|
safety_code1=$(echo "$llm_output1" | tail -n 1)
|
||
|
safety_status1=$(echo "$llm_output1" | head -n 1)
|
||
|
|
||
|
safety_code2=$(echo "$llm_output2" | tail -n 1)
|
||
|
safety_status2=$(echo "$llm_output2" | head -n 1)
|
||
|
|
||
|
safety_code3=$(echo "$llm_output3" | tail -n 1)
|
||
|
safety_status3=$(echo "$llm_output3" | head -n 1)
|
||
|
|
||
|
# If any response is safe, it's likely the description is actually
|
||
|
# safe.
|
||
|
if [[ "$safety_status1" == "safe" ||
|
||
|
"$safety_status2" == "safe" ||
|
||
|
"$safety_status3" == "safe" ]]; then
|
||
|
echo "safe"
|
||
|
else
|
||
|
# if all responses unsafe, then definitely unsafe. otherwise,
|
||
|
# unclear response (return all responses).
|
||
|
if [[ "$safety_status1" == "unsafe" &&
|
||
|
"$safety_status2" == "unsafe" &&
|
||
|
"$safety_status3" == "unsafe" ]]; then
|
||
|
echo -e "unsafe\n${safety_code1}"
|
||
|
else
|
||
|
echo "${llm_output1}. ${llm_output2}. ${llm_output3}." | tr --delete '\n'
|
||
|
fi
|
||
|
fi
|
||
|
}
|
||
|
|
||
|
export -f get_llm_response
|
||
|
|
||
|
evaluate_instance_safety() {
|
||
|
local id="$1"
|
||
|
local host="$2"
|
||
|
local description="$3"
|
||
|
|
||
|
display_desc=$(echo "${description:0:80}" | tr --delete '\n')
|
||
|
|
||
|
# For some reason, we can wind up with empty ids and hosts. Do
|
||
|
# nothing with those.
|
||
|
if [[ -z "$id" || -z "$host" ]]; then
|
||
|
return 0
|
||
|
fi
|
||
|
|
||
|
# if host does not respond, ignore it.
|
||
|
curl --max-time 3 -L -s -o /dev/null -XOPTIONS "https://$host"
|
||
|
if [[ $? != 0 ]]; then
|
||
|
echo "[$id] $host"
|
||
|
echo " ├Description: $display_desc"
|
||
|
echo " └not responding"
|
||
|
return 0
|
||
|
fi
|
||
|
|
||
|
# Call aichat with the description and capture the output
|
||
|
prompt="$DESCRIPTION_PROMPT $description"
|
||
|
llm_output=$(get_llm_response "$prompt")
|
||
|
|
||
|
safety_code=$(echo "$llm_output" | tail -n 1)
|
||
|
safety_status=$(echo "$llm_output" | head -n 1)
|
||
|
|
||
|
# we can ignore certain unsafe codes, because they're not related
|
||
|
# to inappropriate content. S6 = specialized advice, S7 = privacy,
|
||
|
# S8 = intellectual property
|
||
|
if [[ "$safety_status" == "unsafe" ]]; then
|
||
|
if [[ "$safety_code" == "S6" ||
|
||
|
"$safety_code" == "S7" ||
|
||
|
"$safety_code" == "S8" ]]; then
|
||
|
safety_status="safe"
|
||
|
safety_code=""
|
||
|
fi
|
||
|
fi
|
||
|
|
||
|
# Output readable info
|
||
|
echo "[$id] $host"
|
||
|
if [[ $safety_status == "safe" ]]; then
|
||
|
echo " ├Description: $display_desc"
|
||
|
echo " └safe"
|
||
|
elif [[ $safety_status == "unsafe" ]]; then
|
||
|
write_desc=$(echo "${description}" | tr --delete '\n')
|
||
|
echo " ├Description: $display_desc"
|
||
|
echo " ├Code: $safety_code"
|
||
|
echo " └unsafe"
|
||
|
echo "$id,$host,$safety_code,\"$write_desc\"" >> $UNSAFE
|
||
|
else
|
||
|
echo " ├Description: $display_desc"
|
||
|
echo " └unknown: $safety_status"
|
||
|
echo "$id,$host,$safety_status" >> $FAILURES
|
||
|
fi
|
||
|
}
|
||
|
|
||
|
export -f evaluate_instance_safety
|
||
|
|
||
|
# Signal handler to kill child processes and exit the script
|
||
|
trap 'echo EXITING...; killall -HUP parallel; kill $(jobs -p); exit' INT TERM
|
||
|
|
||
|
# Sanitize instance URL to sane URL formatting.
|
||
|
url="$INSTANCE"
|
||
|
while [[ "$url" == */ ]]; do
|
||
|
url="${url%*/}"
|
||
|
done
|
||
|
url="$url/api/federation/instances"
|
||
|
|
||
|
# If we are with a different start offset, we assume we're continuing
|
||
|
# from some previous failed invocation. Otherwise, we clear out the
|
||
|
# output files.
|
||
|
if [ ! -z "$START_OFFSET" ]; then
|
||
|
echo "Starting at $START_OFFSET"
|
||
|
else
|
||
|
echo "" > $UNSAFE
|
||
|
echo "" > $FAILURES
|
||
|
fi
|
||
|
|
||
|
# Calls the Misskey API to get at most <LIMIT> instances at a time
|
||
|
offset="${START_OFFSET:-0}"
|
||
|
while true; do
|
||
|
response=$(curl -s "$url" -X POST -H 'Content-Type: application/json' --data-raw "{\"offset\": $offset,\"limit\":$LIMIT,\"i\":\"$API_KEY\",\"nsfw\":false,\"silenced\":false,\"blocked\":false,\"notResponding\":false}")
|
||
|
|
||
|
if [[ $(echo "$response" | jq '. | length') -gt 0 ]]; then
|
||
|
echo "Processing results from offset $offset..."
|
||
|
|
||
|
# Iterate over each instance in the list (filtering out missing descriptions)
|
||
|
instances=$(echo "$response" | jq -c -r '.[] | select(.description != null)')
|
||
|
inputs=""
|
||
|
|
||
|
# Builds up an arg list for GNU parallel to execute multiple
|
||
|
# requests against LLM via aichat. Description has HTML
|
||
|
# stripped out by w3m so that GNU parallel's CSV input works
|
||
|
# correctly.
|
||
|
while IFS= read -r instance; do
|
||
|
id=$(echo "$instance" | jq -r .id)
|
||
|
host=$(echo "$instance" | jq -r .host)
|
||
|
|
||
|
# remove HMTL, quotes, and various symbols from
|
||
|
# description, trim whitespace, and only add to arg list
|
||
|
# if we actually have something to check.
|
||
|
description=$(echo "$instance" | jq -r .description | w3m -dump -T text/html)
|
||
|
description=$(echo "$description" | \
|
||
|
sed -e 's/^[[:space:]]*//' | \
|
||
|
sed -e 's/[[:space:]]*$//')
|
||
|
description=$(echo "$description" | sed 's/<//g' | sed 's/>//g')
|
||
|
description=$(echo "$description" | sed 's/^//g' | sed 's/"//g')
|
||
|
|
||
|
if [ ! -z "$description" ]; then
|
||
|
printf -v inputs '%s"%s","%s","%s"\n' "$inputs" "$id" "$host" "$description"
|
||
|
fi
|
||
|
done <<< "$instances"
|
||
|
|
||
|
# final cleanup and execution of LLM analysis.
|
||
|
inputs=$(echo "$inputs" | sed '/^[[:blank:]]*$/ d')
|
||
|
parallel -P $MAX_REQUESTS --csv evaluate_instance_safety ::: "${inputs[@]}"
|
||
|
|
||
|
# increment the offset for the next API request
|
||
|
((offset+=LIMIT))
|
||
|
else
|
||
|
echo "No more items to process. The list is empty."
|
||
|
exit 0
|
||
|
fi
|
||
|
done
|