You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

363 lines
11 KiB

#!/bin/bash
##
## TODO
## - don't sleep 1 but wait in flock for 1 second
## - every waiting proc should write at least their PID and priority,
## to leave alive PID with higher priority the precedence. (and probably
## a check to the last probing time, and invalidate it if it is higher than 10s
## for example.)
## - could add the time they waited in the waiting list, and last probe.
## - should execute "$@", if user needs '-c' it can run ``bash -c ""``
exname="$(basename "$0")"
usage="$exname LOCKLABELS [-k] [FLOCK_OPTIONS] -- [CMD...]"
verb() { [ -z "$verbose" ] || echo "$@" >&2 ; }
err() { echo "$@" >&2; }
die() { echo "$@" >&2; exit 1; }
md5_compat() { md5sum | cut -c -32; true; }
LOCKLABELS=
flock_opts=()
command=()
nonblock=
errcode=1
timeout=
cmd=
priority=1
remove_duplicate=
while [ "$1" ]; do
case "$1" in
-h|--help)
echo "$help"
exit 0
;;
-V|--version)
echo "$version"
exit 0
;;
-c)
cmd="$2"
shift
;;
-p|--priority)
priority=$2
shift
;;
-D)
remove_duplicate=true
;;
-k)
kill=yes
;;
-n|--nb|--nonblock)
nonblock=true
;;
-w|--wait|--timeout)
timeout=$2 ## will manage this
shift
;;
-E|--conflict-exit-code)
errcode=$2 ## will manage this
shift
;;
-v|--verbose)
verbose=true ## will manage this
;;
-n|--nb|--nonblock)
nonblock=true ## will manage this
;;
--)
[ "$cmd" ] && die "'--' and '-c' are mutualy exclusive"
shift
command+=("$@")
break 2
;;
*)
[ -z "$LOCKLABELS" ] && { LOCKLABELS=$1 ; shift ; continue ; }
flock_opts+=("$1")
;;
esac
shift
done
if [ -z "$LOCKLABELS" ]; then
err "You must provide a lock file as first argument."
err "$usage"
exit 1
fi
if [ "$remove_duplicate" ]; then
md5code=$(
if [ "$cmd" ]; then
echo bash -c "$cmd"
else
echo "${command[@]}"
fi | md5_compat)
fi
function is_int () { [[ "$1" =~ ^-?[0-9]+$ ]] ; }
is_pid_alive() {
local pid="$1"
ps --pid "$pid" >/dev/null 2>&1
}
is_pgid_alive() {
local pgid="$1"
[ "$(ps -e -o pgid,pid= | egrep "^ *$pgid ")" ]
}
pgid_from_pid() {
local pid="$1"
pgid=$(ps -o pgid= "$pid" 2>/dev/null | egrep -o "[0-9]+")
if ! is_int "$pgid"; then
err "Could not retrieve a valid PGID from PID '$pid' (returned '$pgid')."
return 1
fi
echo "$pgid"
}
ensure_kill() {
local pid="$1" timeout=5 start=$SECONDS kill_count=0 pgid
pgid=$(pgid_from_pid "$pid")
while is_pid_alive "$pid"; do
if is_pgid_alive "$pgid"; then
if [ "$kill_count" -gt 4 ]; then
err "FATAL: duplicate command, GPID=$pgid has resisted kill procedure. Aborting."
return 1
elif [ "$kill_count" -gt 2 ]; then
err "duplicate command, PGID wouldn't close itself, force kill PGID: kill -9 -- -$pgid"
kill -9 -- "$pgid"
sleep 1
else
err "duplicate command, Sending SIGKILL to PGID: kill -- -$pgid"
kill -- -"$pgid"
sleep 1
fi
((kill_count++))
fi
if [ "$((SECONDS - start))" -gt "$timeout" ]; then
err "timeout reached. $pid"
return 1
fi
done
return 0
}
acquire_pid_file() {
local label=$1
lockfile="/var/lock/lockcmd-$label.lock"
mkdir -p /var/run/lockcmd
pidfile="/var/run/lockcmd/$label.pid"
export pidfile
(
verb() { [ -z "$verbose" ] || echo "$exname($label) $pid> $@" >&2 ; }
err() { echo "$exname($label) $pid> $@" >&2; }
start=$SECONDS
kill_count=0
pgid_not_alive_count=0
while true; do
## ask for lock on $lockfile (fd 200)
if ! flock -n -x 200; then
verb "Couldn't acquire primary lock... (elapsed $((SECONDS - start)))"
else
verb "Acquired lock '$label' on pidfile, inspecting pidfile."
if ! [ -e "$pidfile" ]; then
verb "No pidfile, inscribing my PID"
echo -e "$pid $priority" > "$pidfile"
exit 0
fi
if ! content=$(cat "$pidfile" 2>/dev/null); then
err "Can't read $pidfile"
exit 1
fi
read opid opriority < <(echo "$content" | head -n 1)
opriority=${opriority:-1}
verb "Previous PID is $opid, with priority $opriority"
if ! is_pid_alive "$opid"; then
err "Ignoring stale PID $opid"
echo -e "$pid $priority" > "$pidfile"
exit 0
else
if [ "$remove_duplicate" ]; then ## Add my pid and md5 if not already there.
same_cmd_pids=$(
echo "$content" | tail -n +1 | \
egrep "^[0-9]+ $md5code$" 2>/dev/null | \
cut -f 1 -d " ")
same_pids=()
found_myself=
for spid in $same_cmd_pids; do
if [ "$spid" == "$pid" ]; then
found_myself=true
continue
fi
same_pids+=("$spid")
done
[ "$found_myself" ] || echo "$pid $md5code" >> "$pidfile"
fi
flock -u 200 ## reopen the lock to give a chance to the other process to remove the pidfile.
if [ "$remove_duplicate" ]; then ## Add my pid and md5 if not already there.
for spid in "${same_pids[@]}"; do
if ! ensure_kill "$spid"; then
err "Couldn't kill previous duplicate command."
exit 1
fi
done
fi
pgid=$(pgid_from_pid "$opid")
verb "PGID of previous PID is $pgid"
if is_pgid_alive "$pgid"; then
verb "Previous PGID is still alive"
if [ "$kill" ] && [ "$priority" -ge "$opriority" ]; then
if [ "$kill_count" -gt 4 ]; then
err "$pid>FATAL: GPID=$pgid has resisted kill procedure. Aborting."
exit 1
elif [ "$kill_count" -gt 2 ]; then
err "PGID wouldn't close itself, force kill PGID: kill -9 -- -$pgid" >&2
kill -9 -- "$pgid"
sleep 1
else
err "Sending SIGKILL to PGID: kill -- -$pgid" >&2
kill -- -"$pgid"
sleep 1
fi
((kill_count++))
else
if [ "$nonblock" ]; then
verb "Nonblock options forces exit."
exit 1
else
verb "Couldn't acquire Lock... (elapsed $((SECONDS - start)))"
fi
fi
else
if [ "$pgid_not_alive_count" -gt 4 ]; then
verb "$pid>A lock exists for label $label, but PGID:$pgid in it isn't alive while child $pid is ?!?."
err "$pid>Can't force seizing the lock." >&2
exit 1
fi
((pgid_not_alive_count++))
fi
fi
fi
if [ "$timeout" ] && [ "$timeout" -lt "$((SECONDS - start))" ]; then
err "Timeout reached (${timeout}s) while waiting for lock on $label"
exit "$errcode"
fi
sleep 1
done
) 200> "$lockfile"
}
remove_pid_file() {
local label=$1
lockfile="/var/lock/lockcmd-$label.lock"
mkdir -p /var/run/lockcmd
pidfile="/var/run/lockcmd/$label.pid"
(
verb() { [ -z "$verbose" ] || echo "$exname($label) $pid> $@" >&2 ; }
err() { echo "$exname($label) $pid> $@" >&2; }
verb "Asking lock to delete $pidfile."
timeout=5
start=$SECONDS
while true; do
## ask for lock on $lockfile (fd 200)
if ! flock -n -x 200; then
verb "Couldn't acquire primary lock... (elapsed $((SECONDS - start)))"
else
verb "Acquired lock '$label' on pidfile."
if ! [ -e "$pidfile" ]; then
verb "No more pidfile, somebody deleted for us ?1?"
exit 1
fi
if ! content=$(cat "$pidfile" 2>/dev/null); then
err "Can't read $pidfile"
exit 1
fi
read opid opriority < <(echo "$content" | head -n 1)
opriority=${opriority:-1}
if [ "$opid" == "$pid" ]; then
verb "Deleted pidfile. Releasing lock."
rm -f "$pidfile"
exit 0
else
verb "Removing duplicates in pidfile. Releasing lock."
[ "$remove_duplicate" ] && sed -ri "/^$pid $md5code$/d" "$pidfile"
exit 0
fi
fi
if [ "$timeout" ] && [ "$timeout" -lt "$((SECONDS - start))" ]; then
err "Timeout reached (${timeout}s) while waiting for lock on $label"
exit "$errcode"
fi
sleep 1
done
) 200> "$lockfile"
}
## appends a command to the signal handler functions
#
# example: trap_add EXIT,INT close_ssh "$ip"
trap_add() {
local sigs="$1" sig cmd old_cmd
shift || {
echo "${FUNCNAME} usage error" >&2
return 1
}
cmd="$@"
while IFS="," read -d "," sig; do
prev_cmd="$(trap -p "$sig")"
if [ "$prev_cmd" ]; then
new_cmd="${prev_cmd#trap -- \'}"
new_cmd="${new_cmd%\' "$sig"};$cmd"
else
new_cmd="$cmd"
fi
trap -- "$new_cmd" "$sig" || {
echo "unable to add command '$@' to trap $sig" >&2 ;
return 1
}
done < <(echo "$sigs,")
}
remove_all_pid_file() {
while read -d "," label; do
{
remove_pid_file "$label" || err "Could not delete $label"
} &
done < <(echo "$LOCKLABELS,")
wait
}
##
## Code
##
pid="$$"
trap_add EXIT "remove_all_pid_file"
while read -d "," label; do
acquire_pid_file "$label" || exit "$errcode" &
done < <(echo "$LOCKLABELS,")
wait
if [ "$cmd" ]; then
bash -c "$cmd"
else
"${command[@]}"
fi
errlvl="$?"
exit "$?"