#!/bin/sh

usage() {
	echo "Usage: $0 <command_name> [<<options>>]"
	echo "	command_name: Name of the executable, see ps -o comm"
	echo "	--cpu-limit <pct>: Only processes that are above this limit for their lifetime CPU-Usage"
	echo "	           (in percent) will be consiered for being killed (default: 80)"
	echo "	--lifetime-limit <seconds>: Only processes that are older than <lifetime_limit> Seconds"
	echo "	                will be considered for being killed (default: 36000, 10h)"
	echo "	--kill-sigterm: Kills the resulting processes with a sigterm"
	echo "	--kill-sigkill: Kills the resulting processes with a sigkill"
	echo "	--help: Prints this message"
	echo "Note: Both lifetime_limit AND cpu_limit have to be above the threshhold for the process to actually be killed"
	echo "Note: because such processes are usually not responsive anymore SIGKILL is used."
}

if [ -z "$1" ] ; then
	usage
	exit 1
fi

COMMAND_NAME="$1"
CPU_LIMIT=80
LIFETIME_LIMIT=36000
ACTION="echo"
LOGFILE="/dev/stderr"

shift 1


while [ "$#" -gt 0 ]; do
	case "$1" in
		--help) usage; exit;;
		--cpu-limit) CPU_LIMIT="$2"; shift 2;;
		--lifetime-limit) LIFETIME_LIMIT="$2"; shift 2;;
		--kill-sigterm) ACTION="kill"; shift 1;;
		--kill-sigkill) ACTION="kill -9"; shift 1;;
		--logfile) LOGFILE="$2"; shift 2;;
		*) printf "Unknown option: '%s'\n" "$1"; usage; exit 1;;
	esac
done

printf "# Running cleanup | %s | %s | %s | %s\n" "$(date "+%Y-%m-%d %H:%M:%S")" "$CPU_LIMIT" "$LIFETIME_LIMIT" "$ACTION" >> "$LOGFILE"

ps -eo pid,comm,%cpu,etimes | awk -v "command_name=$COMMAND_NAME" -v "cpu_limit=$CPU_LIMIT" -v "lifetime_limit=$LIFETIME_LIMIT" '($2 == command_name && $3 > cpu_limit && $4 > lifetime_limit){ print $1 }' | while read pid ; do
	$ACTION "$pid"
	printf "%s | %s | %s\n" "$(date "+%Y-%m-%d %H:%M:%S")" "$(ps -eo pid,comm,%cpu,etimes | awk -v "pid=$pid" '($1 == pid){ print pid " | " $2 " | " $3 }' )" "$ACTION" >> "$LOGFILE"
done

printf "# ------------------------------\n" >> "$LOGFILE"
