misskey-safety-scan/scan-federated-instances

#!/bin/bash

# Misskey Safety Scan
#
# Scans the descriptions of all federated instances using an LLM to
# analyze the text to determine if the instance is likely to host
# material considered inappropriate by most people.
#
# This doesn't mean the content on other instances is illegal or
# morally wrong, but it does mean that the instance is a good
# candidate for manual analysis by the local instance administrator.
#
# Usage Instructions:
#
#  1. Find your API key (the "i" parameter) from an API request in
#     browser console.
#  2. Execute script like "scan-federated-instances <instance> <key> <model>"
#  3. Wait a really long time.
#
# Output:
#  - unsafe.csv: Instances with descriptions considered inappropriate.
#  - failures.csv: Instances that whose output could not be determined.
#
# What to do with output:
#
#  1. Run scan-output.csv through the verify-scan script. This will
#     double check descriptions.
#
#  2. Take appropriate action against instances that you feel promote
#     content that is against your instance's terms of service, or are
#     illegal.
#
#  3. Check failures.csv manually for failure reasons.

###### MAIN CONFIG ######
LIMIT=100
# User information
export INSTANCE="$1"
export API_KEY="$2"
export LLM_MODEL="$3"
#########################

#########################
export UNSAFE="scan-output.csv"
export FAILURES="failures.csv"
#########################

#### LLM INFORMATION ####
export MAX_REQUESTS=4

read -r -d '' DESCRIPTION_PROMPT <<'EOF'
Here is the description of a Mastodon instance.
Evaluate if it is safe or not:
EOF
#########################

# Run inference multiple times to account for variance in LLM
# responses.
get_llm_response() {
    prompt="$1"
    llm_output1=$(echo "$prompt" | aichat -S -m $LLM_MODEL | head -n 2)
    llm_output2=$(echo "$prompt" | aichat -S -m $LLM_MODEL | head -n 2)
    llm_output3=$(echo "$prompt" | aichat -S -m $LLM_MODEL | head -n 2)

    safety_code1=$(echo "$llm_output1" | tail -n 1)
    safety_status1=$(echo "$llm_output1" | head -n 1)

    safety_code2=$(echo "$llm_output2" | tail -n 1)
    safety_status2=$(echo "$llm_output2" | head -n 1)

    safety_code3=$(echo "$llm_output3" | tail -n 1)
    safety_status3=$(echo "$llm_output3" | head -n 1)

    # If any response is safe, it's likely the description is actually
    # safe.
    if [[ "$safety_status1" == "safe" ||
	      "$safety_status2" == "safe" ||
	      "$safety_status3" == "safe" ]]; then
	echo "safe"
    else
	# if all responses unsafe, then definitely unsafe. otherwise,
	# unclear response (return all responses).
	if [[ "$safety_status1" == "unsafe" &&
		  "$safety_status2" == "unsafe" &&
		  "$safety_status3" == "unsafe" ]]; then
	    echo -e "unsafe\n${safety_code1}"
	else
	    echo "${llm_output1}. ${llm_output2}. ${llm_output3}." | tr --delete '\n'
	fi
    fi
}

export -f get_llm_response

evaluate_instance_safety() {
    local id="$1"
    local host="$2"
    local description="$3"

    display_desc=$(echo "${description:0:80}" |  tr --delete '\n')

    # For some reason, we can wind up with empty ids and hosts. Do
    # nothing with those.
    if [[ -z "$id" || -z "$host" ]]; then
	return 0
    fi

    # if host does not respond, ignore it.
    curl --max-time 3 -L -s -o /dev/null -XOPTIONS "https://$host"
    if [[ $? != 0 ]]; then
	echo "[$id] $host"
	echo " ├Description: $display_desc"
	echo " └not responding"
	return 0
    fi

    # Call aichat with the description and capture the output
    prompt="$DESCRIPTION_PROMPT $description"
    llm_output=$(get_llm_response "$prompt")

    safety_code=$(echo "$llm_output" | tail -n 1)
    safety_status=$(echo "$llm_output" | head -n 1)

    # we can ignore certain unsafe codes, because they're not related
    # to inappropriate content. S6 = specialized advice, S7 = privacy,
    # S8 = intellectual property
    if [[ "$safety_status" == "unsafe" ]]; then
	if [[ "$safety_code" == "S6" ||
		  "$safety_code" == "S7" ||
		  "$safety_code" == "S8" ]]; then
	    safety_status="safe"
	    safety_code=""
	fi
    fi

    # Output readable info
    echo "[$id] $host"
    if [[ $safety_status == "safe" ]]; then
	echo " ├Description: $display_desc"
        echo " └safe"
    elif [[ $safety_status == "unsafe" ]]; then
	write_desc=$(echo "${description}" |  tr --delete '\n')
	echo " ├Description: $display_desc"
	echo " ├Code: $safety_code"
	echo " └unsafe"
        echo "$id,$host,$safety_code,\"$write_desc\"" >> $UNSAFE
    else
	echo " ├Description: $display_desc"
        echo " └unknown: $safety_status"
	echo "$id,$host,$safety_status" >> $FAILURES
    fi
}

export -f evaluate_instance_safety

# Signal handler to kill child processes and exit the script
trap 'echo EXITING...; killall -HUP parallel; kill $(jobs -p); exit' INT TERM

# Sanitize instance URL to sane URL formatting.
url="$INSTANCE"
while [[ "$url" == */ ]]; do
    url="${url%*/}"
done
url="$url/api/federation/instances"

# If we are with a different start offset, we assume we're continuing
# from some previous failed invocation. Otherwise, we clear out the
# output files.
if [ ! -z "$START_OFFSET" ]; then
    echo "Starting at $START_OFFSET"
else
    echo "" > $UNSAFE
    echo "" > $FAILURES
fi

# Calls the Misskey API to get at most <LIMIT> instances at a time
offset="${START_OFFSET:-0}"
while true; do
    response=$(curl -s "$url" -X POST -H 'Content-Type: application/json' --data-raw "{\"offset\": $offset,\"limit\":$LIMIT,\"i\":\"$API_KEY\",\"nsfw\":false,\"silenced\":false,\"blocked\":false,\"notResponding\":false}")

    if [[ $(echo "$response" | jq '. | length') -gt 0 ]]; then
        echo "Processing results from offset $offset..."

        # Iterate over each instance in the list (filtering out missing descriptions)
        instances=$(echo "$response" | jq -c -r '.[] | select(.description != null)')
	inputs=""

	# Builds up an arg list for GNU parallel to execute multiple
	# requests against LLM via aichat. Description has HTML
	# stripped out by w3m so that GNU parallel's CSV input works
	# correctly.
	while IFS= read -r instance; do
            id=$(echo "$instance" | jq -r .id)
            host=$(echo "$instance" | jq -r .host)

	    # remove HMTL, quotes, and various symbols from
	    # description, trim whitespace, and only add to arg list
	    # if we actually have something to check.
            description=$(echo "$instance" | jq -r .description | w3m -dump -T text/html)
	    description=$(echo "$description" | \
			      sed -e 's/^[[:space:]]*//' | \
			      sed -e 's/[[:space:]]*$//')
	    description=$(echo "$description" | sed 's/<//g' | sed 's/>//g')
	    description=$(echo "$description" | sed 's/^//g' | sed 's/"//g')

	    if [ ! -z "$description" ]; then
		printf -v inputs '%s"%s","%s","%s"\n' "$inputs" "$id" "$host" "$description"
	    fi
	done <<< "$instances"

	# final cleanup and execution of LLM analysis.
	inputs=$(echo "$inputs" | sed '/^[[:blank:]]*$/ d')
	parallel -P $MAX_REQUESTS --csv evaluate_instance_safety ::: "${inputs[@]}"

        # increment the offset for the next API request
        ((offset+=LIMIT))
    else
        echo "No more items to process. The list is empty."
        exit 0
    fi
done
Initial commit 2024-10-17 14:55:26 +00:00			`#!/bin/bash`

			`# Misskey Safety Scan`
			`#`
			`# Scans the descriptions of all federated instances using an LLM to`
			`# analyze the text to determine if the instance is likely to host`
			`# material considered inappropriate by most people.`
			`#`
			`# This doesn't mean the content on other instances is illegal or`
			`# morally wrong, but it does mean that the instance is a good`
			`# candidate for manual analysis by the local instance administrator.`
			`#`
			`# Usage Instructions:`
			`#`
			`# 1. Find your API key (the "i" parameter) from an API request in`
			`# browser console.`
			`# 2. Execute script like "scan-federated-instances <instance> <key> <model>"`
			`# 3. Wait a really long time.`
			`#`
			`# Output:`
			`# - unsafe.csv: Instances with descriptions considered inappropriate.`
			`# - failures.csv: Instances that whose output could not be determined.`
			`#`
			`# What to do with output:`
			`#`
			`# 1. Run scan-output.csv through the verify-scan script. This will`
			`# double check descriptions.`
			`#`
			`# 2. Take appropriate action against instances that you feel promote`
			`# content that is against your instance's terms of service, or are`
			`# illegal.`
			`#`
			`# 3. Check failures.csv manually for failure reasons.`

			`###### MAIN CONFIG ######`
			`LIMIT=100`
			`# User information`
			`export INSTANCE="$1"`
			`export API_KEY="$2"`
			`export LLM_MODEL="$3"`
			`#########################`

			`#########################`
			`export UNSAFE="scan-output.csv"`
			`export FAILURES="failures.csv"`
			`#########################`

			`#### LLM INFORMATION ####`
			`export MAX_REQUESTS=4`

			`read -r -d '' DESCRIPTION_PROMPT <<'EOF'`
			`Here is the description of a Mastodon instance.`
			`Evaluate if it is safe or not:`
			`EOF`
			`#########################`

			`# Run inference multiple times to account for variance in LLM`
			`# responses.`
			`get_llm_response() {`
			`prompt="$1"`
			`llm_output1=$(echo "$prompt" \| aichat -S -m $LLM_MODEL \| head -n 2)`
			`llm_output2=$(echo "$prompt" \| aichat -S -m $LLM_MODEL \| head -n 2)`
			`llm_output3=$(echo "$prompt" \| aichat -S -m $LLM_MODEL \| head -n 2)`

			`safety_code1=$(echo "$llm_output1" \| tail -n 1)`
			`safety_status1=$(echo "$llm_output1" \| head -n 1)`

			`safety_code2=$(echo "$llm_output2" \| tail -n 1)`
			`safety_status2=$(echo "$llm_output2" \| head -n 1)`

			`safety_code3=$(echo "$llm_output3" \| tail -n 1)`
			`safety_status3=$(echo "$llm_output3" \| head -n 1)`

			`# If any response is safe, it's likely the description is actually`
			`# safe.`
			`if [[ "$safety_status1" == "safe" \|\|`
			`"$safety_status2" == "safe" \|\|`
			`"$safety_status3" == "safe" ]]; then`
			`echo "safe"`
			`else`
			`# if all responses unsafe, then definitely unsafe. otherwise,`
			`# unclear response (return all responses).`
			`if [[ "$safety_status1" == "unsafe" &&`
			`"$safety_status2" == "unsafe" &&`
			`"$safety_status3" == "unsafe" ]]; then`
			`echo -e "unsafe\n${safety_code1}"`
			`else`
			`echo "${llm_output1}. ${llm_output2}. ${llm_output3}." \| tr --delete '\n'`
			`fi`
			`fi`
			`}`

			`export -f get_llm_response`

			`evaluate_instance_safety() {`
			`local id="$1"`
			`local host="$2"`
			`local description="$3"`

			`display_desc=$(echo "${description:0:80}" \| tr --delete '\n')`

			`# For some reason, we can wind up with empty ids and hosts. Do`
			`# nothing with those.`
			`if [[ -z "$id" \|\| -z "$host" ]]; then`
			`return 0`
			`fi`

			`# if host does not respond, ignore it.`
			`curl --max-time 3 -L -s -o /dev/null -XOPTIONS "https://$host"`
			`if [[ $? != 0 ]]; then`
			`echo "[$id] $host"`
			`echo " ├Description: $display_desc"`
			`echo " └not responding"`
			`return 0`
			`fi`

			`# Call aichat with the description and capture the output`
			`prompt="$DESCRIPTION_PROMPT $description"`
			`llm_output=$(get_llm_response "$prompt")`

			`safety_code=$(echo "$llm_output" \| tail -n 1)`
			`safety_status=$(echo "$llm_output" \| head -n 1)`

			`# we can ignore certain unsafe codes, because they're not related`
			`# to inappropriate content. S6 = specialized advice, S7 = privacy,`
			`# S8 = intellectual property`
			`if [[ "$safety_status" == "unsafe" ]]; then`
			`if [[ "$safety_code" == "S6" \|\|`
			`"$safety_code" == "S7" \|\|`
			`"$safety_code" == "S8" ]]; then`
			`safety_status="safe"`
			`safety_code=""`
			`fi`
			`fi`

			`# Output readable info`
			`echo "[$id] $host"`
			`if [[ $safety_status == "safe" ]]; then`
			`echo " ├Description: $display_desc"`
			`echo " └safe"`
			`elif [[ $safety_status == "unsafe" ]]; then`
			`write_desc=$(echo "${description}" \| tr --delete '\n')`
			`echo " ├Description: $display_desc"`
			`echo " ├Code: $safety_code"`
			`echo " └unsafe"`
			`echo "$id,$host,$safety_code,\"$write_desc\"" >> $UNSAFE`
			`else`
			`echo " ├Description: $display_desc"`
			`echo " └unknown: $safety_status"`
			`echo "$id,$host,$safety_status" >> $FAILURES`
			`fi`
			`}`

			`export -f evaluate_instance_safety`

			`# Signal handler to kill child processes and exit the script`
			`trap 'echo EXITING...; killall -HUP parallel; kill $(jobs -p); exit' INT TERM`

			`# Sanitize instance URL to sane URL formatting.`
			`url="$INSTANCE"`
			`while [[ "$url" == */ ]]; do`
			`url="${url%*/}"`
			`done`
			`url="$url/api/federation/instances"`

			`# If we are with a different start offset, we assume we're continuing`
			`# from some previous failed invocation. Otherwise, we clear out the`
			`# output files.`
			`if [ ! -z "$START_OFFSET" ]; then`
			`echo "Starting at $START_OFFSET"`
			`else`
			`echo "" > $UNSAFE`
			`echo "" > $FAILURES`
			`fi`

			`# Calls the Misskey API to get at most <LIMIT> instances at a time`
			`offset="${START_OFFSET:-0}"`
			`while true; do`
			`response=$(curl -s "$url" -X POST -H 'Content-Type: application/json' --data-raw "{\"offset\": $offset,\"limit\":$LIMIT,\"i\":\"$API_KEY\",\"nsfw\":false,\"silenced\":false,\"blocked\":false,\"notResponding\":false}")`

			`if [[ $(echo "$response" \| jq '. \| length') -gt 0 ]]; then`
			`echo "Processing results from offset $offset..."`

			`# Iterate over each instance in the list (filtering out missing descriptions)`
			`instances=$(echo "$response" \| jq -c -r '.[] \| select(.description != null)')`
			`inputs=""`

			`# Builds up an arg list for GNU parallel to execute multiple`
			`# requests against LLM via aichat. Description has HTML`
			`# stripped out by w3m so that GNU parallel's CSV input works`
			`# correctly.`
			`while IFS= read -r instance; do`
			`id=$(echo "$instance" \| jq -r .id)`
			`host=$(echo "$instance" \| jq -r .host)`

			`# remove HMTL, quotes, and various symbols from`
			`# description, trim whitespace, and only add to arg list`
			`# if we actually have something to check.`
			`description=$(echo "$instance" \| jq -r .description \| w3m -dump -T text/html)`
			`description=$(echo "$description" \| \`
			`sed -e 's/^[[:space:]]*//' \| \`
			`sed -e 's/[[:space:]]*$//')`
			`description=$(echo "$description" \| sed 's/<//g' \| sed 's/>//g')`
			`description=$(echo "$description" \| sed 's/^//g' \| sed 's/"//g')`

			`if [ ! -z "$description" ]; then`
			`printf -v inputs '%s"%s","%s","%s"\n' "$inputs" "$id" "$host" "$description"`
			`fi`
			`done <<< "$instances"`

			`# final cleanup and execution of LLM analysis.`
			`inputs=$(echo "$inputs" \| sed '/^[[:blank:]]*$/ d')`
			`parallel -P $MAX_REQUESTS --csv evaluate_instance_safety ::: "${inputs[@]}"`

			`# increment the offset for the next API request`
			`((offset+=LIMIT))`
			`else`
			`echo "No more items to process. The list is empty."`
			`exit 0`
			`fi`
			`done`