#!/bin/bash # Misskey Safety Scan # # Scans the descriptions of all federated instances using an LLM to # analyze the text to determine if the instance is likely to host # material considered inappropriate by most people. # # This doesn't mean the content on other instances is illegal or # morally wrong, but it does mean that the instance is a good # candidate for manual analysis by the local instance administrator. # # Usage Instructions: # # 1. Find your API key (the "i" parameter) from an API request in # browser console. # 2. Execute script like "scan-federated-instances " # 3. Wait a really long time. # # Output: # - unsafe.csv: Instances with descriptions considered inappropriate. # - failures.csv: Instances that whose output could not be determined. # # What to do with output: # # 1. Run scan-output.csv through the verify-scan script. This will # double check descriptions. # # 2. Take appropriate action against instances that you feel promote # content that is against your instance's terms of service, or are # illegal. # # 3. Check failures.csv manually for failure reasons. ###### MAIN CONFIG ###### LIMIT=100 # User information export INSTANCE="$1" export API_KEY="$2" export LLM_MODEL="$3" ######################### ######################### export UNSAFE="scan-output.csv" export FAILURES="failures.csv" ######################### #### LLM INFORMATION #### export MAX_REQUESTS=4 read -r -d '' DESCRIPTION_PROMPT <<'EOF' Here is the description of a Mastodon instance. Evaluate if it is safe or not: EOF ######################### # Run inference multiple times to account for variance in LLM # responses. get_llm_response() { prompt="$1" llm_output1=$(echo "$prompt" | aichat -S -m $LLM_MODEL | head -n 2) llm_output2=$(echo "$prompt" | aichat -S -m $LLM_MODEL | head -n 2) llm_output3=$(echo "$prompt" | aichat -S -m $LLM_MODEL | head -n 2) safety_code1=$(echo "$llm_output1" | tail -n 1) safety_status1=$(echo "$llm_output1" | head -n 1) safety_code2=$(echo "$llm_output2" | tail -n 1) safety_status2=$(echo "$llm_output2" | head -n 1) safety_code3=$(echo "$llm_output3" | tail -n 1) safety_status3=$(echo "$llm_output3" | head -n 1) # If any response is safe, it's likely the description is actually # safe. if [[ "$safety_status1" == "safe" || "$safety_status2" == "safe" || "$safety_status3" == "safe" ]]; then echo "safe" else # if all responses unsafe, then definitely unsafe. otherwise, # unclear response (return all responses). if [[ "$safety_status1" == "unsafe" && "$safety_status2" == "unsafe" && "$safety_status3" == "unsafe" ]]; then echo -e "unsafe\n${safety_code1}" else echo "${llm_output1}. ${llm_output2}. ${llm_output3}." | tr --delete '\n' fi fi } export -f get_llm_response evaluate_instance_safety() { local id="$1" local host="$2" local description="$3" display_desc=$(echo "${description:0:80}" | tr --delete '\n') # For some reason, we can wind up with empty ids and hosts. Do # nothing with those. if [[ -z "$id" || -z "$host" ]]; then return 0 fi # if host does not respond, ignore it. curl --max-time 3 -L -s -o /dev/null -XOPTIONS "https://$host" if [[ $? != 0 ]]; then echo "[$id] $host" echo " ├Description: $display_desc" echo " └not responding" return 0 fi # Call aichat with the description and capture the output prompt="$DESCRIPTION_PROMPT $description" llm_output=$(get_llm_response "$prompt") safety_code=$(echo "$llm_output" | tail -n 1) safety_status=$(echo "$llm_output" | head -n 1) # we can ignore certain unsafe codes, because they're not related # to inappropriate content. S6 = specialized advice, S7 = privacy, # S8 = intellectual property if [[ "$safety_status" == "unsafe" ]]; then if [[ "$safety_code" == "S6" || "$safety_code" == "S7" || "$safety_code" == "S8" ]]; then safety_status="safe" safety_code="" fi fi # Output readable info echo "[$id] $host" if [[ $safety_status == "safe" ]]; then echo " ├Description: $display_desc" echo " └safe" elif [[ $safety_status == "unsafe" ]]; then write_desc=$(echo "${description}" | tr --delete '\n') echo " ├Description: $display_desc" echo " ├Code: $safety_code" echo " └unsafe" echo "$id,$host,$safety_code,\"$write_desc\"" >> $UNSAFE else echo " ├Description: $display_desc" echo " └unknown: $safety_status" echo "$id,$host,$safety_status" >> $FAILURES fi } export -f evaluate_instance_safety # Signal handler to kill child processes and exit the script trap 'echo EXITING...; killall -HUP parallel; kill $(jobs -p); exit' INT TERM # Sanitize instance URL to sane URL formatting. url="$INSTANCE" while [[ "$url" == */ ]]; do url="${url%*/}" done url="$url/api/federation/instances" # If we are with a different start offset, we assume we're continuing # from some previous failed invocation. Otherwise, we clear out the # output files. if [ ! -z "$START_OFFSET" ]; then echo "Starting at $START_OFFSET" else echo "" > $UNSAFE echo "" > $FAILURES fi # Calls the Misskey API to get at most instances at a time offset="${START_OFFSET:-0}" while true; do response=$(curl -s "$url" -X POST -H 'Content-Type: application/json' --data-raw "{\"offset\": $offset,\"limit\":$LIMIT,\"i\":\"$API_KEY\",\"nsfw\":false,\"silenced\":false,\"blocked\":false,\"notResponding\":false}") if [[ $(echo "$response" | jq '. | length') -gt 0 ]]; then echo "Processing results from offset $offset..." # Iterate over each instance in the list (filtering out missing descriptions) instances=$(echo "$response" | jq -c -r '.[] | select(.description != null)') inputs="" # Builds up an arg list for GNU parallel to execute multiple # requests against LLM via aichat. Description has HTML # stripped out by w3m so that GNU parallel's CSV input works # correctly. while IFS= read -r instance; do id=$(echo "$instance" | jq -r .id) host=$(echo "$instance" | jq -r .host) # remove HMTL, quotes, and various symbols from # description, trim whitespace, and only add to arg list # if we actually have something to check. description=$(echo "$instance" | jq -r .description | w3m -dump -T text/html) description=$(echo "$description" | \ sed -e 's/^[[:space:]]*//' | \ sed -e 's/[[:space:]]*$//') description=$(echo "$description" | sed 's///g') description=$(echo "$description" | sed 's/^//g' | sed 's/"//g') if [ ! -z "$description" ]; then printf -v inputs '%s"%s","%s","%s"\n' "$inputs" "$id" "$host" "$description" fi done <<< "$instances" # final cleanup and execution of LLM analysis. inputs=$(echo "$inputs" | sed '/^[[:blank:]]*$/ d') parallel -P $MAX_REQUESTS --csv evaluate_instance_safety ::: "${inputs[@]}" # increment the offset for the next API request ((offset+=LIMIT)) else echo "No more items to process. The list is empty." exit 0 fi done