forked from 0k/0k-charms
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
363 lines
11 KiB
363 lines
11 KiB
#!/bin/bash
|
|
|
|
##
|
|
## TODO
|
|
## - don't sleep 1 but wait in flock for 1 second
|
|
## - every waiting proc should write at least their PID and priority,
|
|
## to leave alive PID with higher priority the precedence. (and probably
|
|
## a check to the last probing time, and invalidate it if it is higher than 10s
|
|
## for example.)
|
|
## - could add the time they waited in the waiting list, and last probe.
|
|
## - should execute "$@", if user needs '-c' it can run ``bash -c ""``
|
|
|
|
exname="$(basename "$0")"
|
|
usage="$exname LOCKLABELS [-k] [FLOCK_OPTIONS] -- [CMD...]"
|
|
|
|
verb() { [ -z "$verbose" ] || echo "$@" >&2 ; }
|
|
err() { echo "$@" >&2; }
|
|
die() { echo "$@" >&2; exit 1; }
|
|
|
|
md5_compat() { md5sum | cut -c -32; true; }
|
|
|
|
LOCKLABELS=
|
|
flock_opts=()
|
|
command=()
|
|
nonblock=
|
|
errcode=1
|
|
timeout=
|
|
cmd=
|
|
priority=1
|
|
remove_duplicate=
|
|
while [ "$1" ]; do
|
|
case "$1" in
|
|
-h|--help)
|
|
echo "$help"
|
|
exit 0
|
|
;;
|
|
-V|--version)
|
|
echo "$version"
|
|
exit 0
|
|
;;
|
|
-c)
|
|
cmd="$2"
|
|
shift
|
|
;;
|
|
-p|--priority)
|
|
priority=$2
|
|
shift
|
|
;;
|
|
-D)
|
|
remove_duplicate=true
|
|
;;
|
|
-k)
|
|
kill=yes
|
|
;;
|
|
-n|--nb|--nonblock)
|
|
nonblock=true
|
|
;;
|
|
-w|--wait|--timeout)
|
|
timeout=$2 ## will manage this
|
|
shift
|
|
;;
|
|
-E|--conflict-exit-code)
|
|
errcode=$2 ## will manage this
|
|
shift
|
|
;;
|
|
-v|--verbose)
|
|
verbose=true ## will manage this
|
|
;;
|
|
-n|--nb|--nonblock)
|
|
nonblock=true ## will manage this
|
|
;;
|
|
--)
|
|
[ "$cmd" ] && die "'--' and '-c' are mutualy exclusive"
|
|
shift
|
|
command+=("$@")
|
|
break 2
|
|
;;
|
|
*)
|
|
[ -z "$LOCKLABELS" ] && { LOCKLABELS=$1 ; shift ; continue ; }
|
|
flock_opts+=("$1")
|
|
;;
|
|
esac
|
|
shift
|
|
done
|
|
|
|
if [ -z "$LOCKLABELS" ]; then
|
|
err "You must provide a lock file as first argument."
|
|
err "$usage"
|
|
exit 1
|
|
fi
|
|
|
|
if [ "$remove_duplicate" ]; then
|
|
md5code=$(
|
|
if [ "$cmd" ]; then
|
|
echo bash -c "$cmd"
|
|
else
|
|
echo "${command[@]}"
|
|
fi | md5_compat)
|
|
fi
|
|
|
|
|
|
function is_int () { [[ "$1" =~ ^-?[0-9]+$ ]] ; }
|
|
|
|
is_pid_alive() {
|
|
local pid="$1"
|
|
ps --pid "$pid" >/dev/null 2>&1
|
|
}
|
|
|
|
|
|
is_pgid_alive() {
|
|
local pgid="$1"
|
|
[ "$(ps -e -o pgid,pid= | egrep "^ *$pgid ")" ]
|
|
}
|
|
|
|
|
|
pgid_from_pid() {
|
|
local pid="$1"
|
|
pgid=$(ps -o pgid= "$pid" 2>/dev/null | egrep -o "[0-9]+")
|
|
if ! is_int "$pgid"; then
|
|
err "Could not retrieve a valid PGID from PID '$pid' (returned '$pgid')."
|
|
return 1
|
|
fi
|
|
echo "$pgid"
|
|
}
|
|
|
|
|
|
ensure_kill() {
|
|
local pid="$1" timeout=5 start=$SECONDS kill_count=0 pgid
|
|
pgid=$(pgid_from_pid "$pid")
|
|
while is_pid_alive "$pid"; do
|
|
if is_pgid_alive "$pgid"; then
|
|
if [ "$kill_count" -gt 4 ]; then
|
|
err "FATAL: duplicate command, GPID=$pgid has resisted kill procedure. Aborting."
|
|
return 1
|
|
elif [ "$kill_count" -gt 2 ]; then
|
|
err "duplicate command, PGID wouldn't close itself, force kill PGID: kill -9 -- -$pgid"
|
|
kill -9 -- "$pgid"
|
|
sleep 1
|
|
else
|
|
err "duplicate command, Sending SIGKILL to PGID: kill -- -$pgid"
|
|
kill -- -"$pgid"
|
|
sleep 1
|
|
fi
|
|
((kill_count++))
|
|
fi
|
|
if [ "$((SECONDS - start))" -gt "$timeout" ]; then
|
|
err "timeout reached. $pid"
|
|
return 1
|
|
fi
|
|
done
|
|
return 0
|
|
}
|
|
|
|
|
|
acquire_pid_file() {
|
|
local label=$1
|
|
lockfile="/var/lock/lockcmd-$label.lock"
|
|
mkdir -p /var/run/lockcmd
|
|
pidfile="/var/run/lockcmd/$label.pid"
|
|
export pidfile
|
|
(
|
|
verb() { [ -z "$verbose" ] || echo "$exname($label) $pid> $@" >&2 ; }
|
|
err() { echo "$exname($label) $pid> $@" >&2; }
|
|
|
|
start=$SECONDS
|
|
kill_count=0
|
|
pgid_not_alive_count=0
|
|
while true; do
|
|
## ask for lock on $lockfile (fd 200)
|
|
if ! flock -n -x 200; then
|
|
verb "Couldn't acquire primary lock... (elapsed $((SECONDS - start)))"
|
|
else
|
|
verb "Acquired lock '$label' on pidfile, inspecting pidfile."
|
|
if ! [ -e "$pidfile" ]; then
|
|
verb "No pidfile, inscribing my PID"
|
|
echo -e "$pid $priority" > "$pidfile"
|
|
exit 0
|
|
fi
|
|
|
|
if ! content=$(cat "$pidfile" 2>/dev/null); then
|
|
err "Can't read $pidfile"
|
|
exit 1
|
|
fi
|
|
read opid opriority < <(echo "$content" | head -n 1)
|
|
opriority=${opriority:-1}
|
|
verb "Previous PID is $opid, with priority $opriority"
|
|
if ! is_pid_alive "$opid"; then
|
|
err "Ignoring stale PID $opid"
|
|
echo -e "$pid $priority" > "$pidfile"
|
|
exit 0
|
|
else
|
|
if [ "$remove_duplicate" ]; then ## Add my pid and md5 if not already there.
|
|
same_cmd_pids=$(
|
|
echo "$content" | tail -n +1 | \
|
|
egrep "^[0-9]+ $md5code$" 2>/dev/null | \
|
|
cut -f 1 -d " ")
|
|
same_pids=()
|
|
found_myself=
|
|
for spid in $same_cmd_pids; do
|
|
if [ "$spid" == "$pid" ]; then
|
|
found_myself=true
|
|
continue
|
|
fi
|
|
same_pids+=("$spid")
|
|
done
|
|
[ "$found_myself" ] || echo "$pid $md5code" >> "$pidfile"
|
|
fi
|
|
flock -u 200 ## reopen the lock to give a chance to the other process to remove the pidfile.
|
|
if [ "$remove_duplicate" ]; then ## Add my pid and md5 if not already there.
|
|
for spid in "${same_pids[@]}"; do
|
|
if ! ensure_kill "$spid"; then
|
|
err "Couldn't kill previous duplicate command."
|
|
exit 1
|
|
fi
|
|
done
|
|
fi
|
|
pgid=$(pgid_from_pid "$opid")
|
|
verb "PGID of previous PID is $pgid"
|
|
if is_pgid_alive "$pgid"; then
|
|
verb "Previous PGID is still alive"
|
|
if [ "$kill" ] && [ "$priority" -ge "$opriority" ]; then
|
|
if [ "$kill_count" -gt 4 ]; then
|
|
err "$pid>FATAL: GPID=$pgid has resisted kill procedure. Aborting."
|
|
exit 1
|
|
elif [ "$kill_count" -gt 2 ]; then
|
|
err "PGID wouldn't close itself, force kill PGID: kill -9 -- -$pgid" >&2
|
|
kill -9 -- "$pgid"
|
|
sleep 1
|
|
else
|
|
err "Sending SIGKILL to PGID: kill -- -$pgid" >&2
|
|
kill -- -"$pgid"
|
|
sleep 1
|
|
fi
|
|
((kill_count++))
|
|
else
|
|
if [ "$nonblock" ]; then
|
|
verb "Nonblock options forces exit."
|
|
exit 1
|
|
else
|
|
verb "Couldn't acquire Lock... (elapsed $((SECONDS - start)))"
|
|
fi
|
|
fi
|
|
else
|
|
if [ "$pgid_not_alive_count" -gt 4 ]; then
|
|
verb "$pid>A lock exists for label $label, but PGID:$pgid in it isn't alive while child $pid is ?!?."
|
|
err "$pid>Can't force seizing the lock." >&2
|
|
exit 1
|
|
fi
|
|
((pgid_not_alive_count++))
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
if [ "$timeout" ] && [ "$timeout" -lt "$((SECONDS - start))" ]; then
|
|
err "Timeout reached (${timeout}s) while waiting for lock on $label"
|
|
exit "$errcode"
|
|
fi
|
|
sleep 1
|
|
done
|
|
) 200> "$lockfile"
|
|
}
|
|
|
|
remove_pid_file() {
|
|
local label=$1
|
|
lockfile="/var/lock/lockcmd-$label.lock"
|
|
mkdir -p /var/run/lockcmd
|
|
pidfile="/var/run/lockcmd/$label.pid"
|
|
|
|
(
|
|
verb() { [ -z "$verbose" ] || echo "$exname($label) $pid> $@" >&2 ; }
|
|
err() { echo "$exname($label) $pid> $@" >&2; }
|
|
verb "Asking lock to delete $pidfile."
|
|
timeout=5
|
|
start=$SECONDS
|
|
while true; do
|
|
## ask for lock on $lockfile (fd 200)
|
|
if ! flock -n -x 200; then
|
|
verb "Couldn't acquire primary lock... (elapsed $((SECONDS - start)))"
|
|
else
|
|
verb "Acquired lock '$label' on pidfile."
|
|
if ! [ -e "$pidfile" ]; then
|
|
verb "No more pidfile, somebody deleted for us ?1?"
|
|
exit 1
|
|
fi
|
|
if ! content=$(cat "$pidfile" 2>/dev/null); then
|
|
err "Can't read $pidfile"
|
|
exit 1
|
|
fi
|
|
read opid opriority < <(echo "$content" | head -n 1)
|
|
opriority=${opriority:-1}
|
|
if [ "$opid" == "$pid" ]; then
|
|
verb "Deleted pidfile. Releasing lock."
|
|
rm -f "$pidfile"
|
|
exit 0
|
|
else
|
|
verb "Removing duplicates in pidfile. Releasing lock."
|
|
[ "$remove_duplicate" ] && sed -ri "/^$pid $md5code$/d" "$pidfile"
|
|
exit 0
|
|
fi
|
|
fi
|
|
if [ "$timeout" ] && [ "$timeout" -lt "$((SECONDS - start))" ]; then
|
|
err "Timeout reached (${timeout}s) while waiting for lock on $label"
|
|
exit "$errcode"
|
|
fi
|
|
sleep 1
|
|
done
|
|
) 200> "$lockfile"
|
|
|
|
}
|
|
|
|
|
|
## appends a command to the signal handler functions
|
|
#
|
|
# example: trap_add EXIT,INT close_ssh "$ip"
|
|
trap_add() {
|
|
local sigs="$1" sig cmd old_cmd
|
|
shift || {
|
|
echo "${FUNCNAME} usage error" >&2
|
|
return 1
|
|
}
|
|
cmd="$@"
|
|
while IFS="," read -d "," sig; do
|
|
prev_cmd="$(trap -p "$sig")"
|
|
if [ "$prev_cmd" ]; then
|
|
new_cmd="${prev_cmd#trap -- \'}"
|
|
new_cmd="${new_cmd%\' "$sig"};$cmd"
|
|
else
|
|
new_cmd="$cmd"
|
|
fi
|
|
trap -- "$new_cmd" "$sig" || {
|
|
echo "unable to add command '$@' to trap $sig" >&2 ;
|
|
return 1
|
|
}
|
|
done < <(echo "$sigs,")
|
|
}
|
|
|
|
remove_all_pid_file() {
|
|
while read -d "," label; do
|
|
{
|
|
remove_pid_file "$label" || err "Could not delete $label"
|
|
} &
|
|
done < <(echo "$LOCKLABELS,")
|
|
wait
|
|
}
|
|
|
|
##
|
|
## Code
|
|
##
|
|
|
|
pid="$$"
|
|
|
|
trap_add EXIT "remove_all_pid_file"
|
|
while read -d "," label; do
|
|
acquire_pid_file "$label" || exit "$errcode" &
|
|
done < <(echo "$LOCKLABELS,")
|
|
wait
|
|
if [ "$cmd" ]; then
|
|
bash -c "$cmd"
|
|
else
|
|
"${command[@]}"
|
|
fi
|
|
errlvl="$?"
|
|
exit "$?"
|