#!/bin/bash ## ## TODO ## - don't sleep 1 but wait in flock for 1 second ## - every waiting proc should write at least their PID and priority, ## to leave alive PID with higher priority the precedence. (and probably ## a check to the last probing time, and invalidate it if it is higher than 10s ## for example.) ## - could add the time they waited in the waiting list, and last probe. ## - should execute "$@", if user needs '-c' it can run ``bash -c ""`` exname="$(basename "$0")" usage="$exname LOCKLABELS [-k] [FLOCK_OPTIONS] -- [CMD...]" verb() { [ -z "$verbose" ] || echo "$@" >&2 ; } err() { echo "$@" >&2; } die() { echo "$@" >&2; exit 1; } md5_compat() { md5sum | cut -c -32; true; } LOCKLABELS= flock_opts=() command=() nonblock= errcode=1 timeout= cmd= priority=1 remove_duplicate= while [ "$1" ]; do case "$1" in -h|--help) echo "$help" exit 0 ;; -V|--version) echo "$version" exit 0 ;; -c) cmd="$2" shift ;; -p|--priority) priority=$2 shift ;; -D) remove_duplicate=true ;; -k) kill=yes ;; -n|--nb|--nonblock) nonblock=true ;; -w|--wait|--timeout) timeout=$2 ## will manage this shift ;; -E|--conflict-exit-code) errcode=$2 ## will manage this shift ;; -v|--verbose) verbose=true ## will manage this ;; -n|--nb|--nonblock) nonblock=true ## will manage this ;; --) [ "$cmd" ] && die "'--' and '-c' are mutualy exclusive" shift command+=("$@") break 2 ;; *) [ -z "$LOCKLABELS" ] && { LOCKLABELS=$1 ; shift ; continue ; } flock_opts+=("$1") ;; esac shift done if [ -z "$LOCKLABELS" ]; then err "You must provide a lock file as first argument." err "$usage" exit 1 fi if [ "$remove_duplicate" ]; then md5code=$( if [ "$cmd" ]; then echo bash -c "$cmd" else echo "${command[@]}" fi | md5_compat) fi function is_int () { [[ "$1" =~ ^-?[0-9]+$ ]] ; } is_pid_alive() { local pid="$1" ps --pid "$pid" >/dev/null 2>&1 } is_pgid_alive() { local pgid="$1" [ "$(ps -e -o pgid,pid= | egrep "^ *$pgid ")" ] } pgid_from_pid() { local pid="$1" pgid=$(ps -o pgid= "$pid" 2>/dev/null | egrep -o "[0-9]+") if ! is_int "$pgid"; then err "Could not retrieve a valid PGID from PID '$pid' (returned '$pgid')." return 1 fi echo "$pgid" } ensure_kill() { local pid="$1" timeout=5 start=$SECONDS kill_count=0 pgid pgid=$(pgid_from_pid "$pid") while is_pid_alive "$pid"; do if is_pgid_alive "$pgid"; then if [ "$kill_count" -gt 4 ]; then err "FATAL: duplicate command, GPID=$pgid has resisted kill procedure. Aborting." return 1 elif [ "$kill_count" -gt 2 ]; then err "duplicate command, PGID wouldn't close itself, force kill PGID: kill -9 -- -$pgid" kill -9 -- "$pgid" sleep 1 else err "duplicate command, Sending SIGKILL to PGID: kill -- -$pgid" kill -- -"$pgid" sleep 1 fi ((kill_count++)) fi if [ "$((SECONDS - start))" -gt "$timeout" ]; then err "timeout reached. $pid" return 1 fi done return 0 } acquire_pid_file() { local label=$1 lockfile="/var/lock/lockcmd-$label.lock" mkdir -p /var/run/lockcmd pidfile="/var/run/lockcmd/$label.pid" export pidfile ( verb() { [ -z "$verbose" ] || echo "$exname($label) $pid> $@" >&2 ; } err() { echo "$exname($label) $pid> $@" >&2; } start=$SECONDS kill_count=0 pgid_not_alive_count=0 while true; do ## ask for lock on $lockfile (fd 200) if ! flock -n -x 200; then verb "Couldn't acquire primary lock... (elapsed $((SECONDS - start)))" else verb "Acquired lock '$label' on pidfile, inspecting pidfile." if ! [ -e "$pidfile" ]; then verb "No pidfile, inscribing my PID" echo -e "$pid $priority" > "$pidfile" exit 0 fi if ! content=$(cat "$pidfile" 2>/dev/null); then err "Can't read $pidfile" exit 1 fi read opid opriority < <(echo "$content" | head -n 1) opriority=${opriority:-1} verb "Previous PID is $opid, with priority $opriority" if ! is_pid_alive "$opid"; then err "Ignoring stale PID $opid" echo -e "$pid $priority" > "$pidfile" exit 0 else if [ "$remove_duplicate" ]; then ## Add my pid and md5 if not already there. same_cmd_pids=$( echo "$content" | tail -n +1 | \ egrep "^[0-9]+ $md5code$" 2>/dev/null | \ cut -f 1 -d " ") same_pids=() found_myself= for spid in $same_cmd_pids; do if [ "$spid" == "$pid" ]; then found_myself=true continue fi same_pids+=("$spid") done [ "$found_myself" ] || echo "$pid $md5code" >> "$pidfile" fi flock -u 200 ## reopen the lock to give a chance to the other process to remove the pidfile. if [ "$remove_duplicate" ]; then ## Add my pid and md5 if not already there. for spid in "${same_pids[@]}"; do if ! ensure_kill "$spid"; then err "Couldn't kill previous duplicate command." exit 1 fi done fi pgid=$(pgid_from_pid "$opid") verb "PGID of previous PID is $pgid" if is_pgid_alive "$pgid"; then verb "Previous PGID is still alive" if [ "$kill" ] && [ "$priority" -ge "$opriority" ]; then if [ "$kill_count" -gt 4 ]; then err "$pid>FATAL: GPID=$pgid has resisted kill procedure. Aborting." exit 1 elif [ "$kill_count" -gt 2 ]; then err "PGID wouldn't close itself, force kill PGID: kill -9 -- -$pgid" >&2 kill -9 -- "$pgid" sleep 1 else err "Sending SIGKILL to PGID: kill -- -$pgid" >&2 kill -- -"$pgid" sleep 1 fi ((kill_count++)) else if [ "$nonblock" ]; then verb "Nonblock options forces exit." exit 1 else verb "Couldn't acquire Lock... (elapsed $((SECONDS - start)))" fi fi else if [ "$pgid_not_alive_count" -gt 4 ]; then verb "$pid>A lock exists for label $label, but PGID:$pgid in it isn't alive while child $pid is ?!?." err "$pid>Can't force seizing the lock." >&2 exit 1 fi ((pgid_not_alive_count++)) fi fi fi if [ "$timeout" ] && [ "$timeout" -lt "$((SECONDS - start))" ]; then err "Timeout reached (${timeout}s) while waiting for lock on $label" exit "$errcode" fi sleep 1 done ) 200> "$lockfile" } remove_pid_file() { local label=$1 lockfile="/var/lock/lockcmd-$label.lock" mkdir -p /var/run/lockcmd pidfile="/var/run/lockcmd/$label.pid" ( verb() { [ -z "$verbose" ] || echo "$exname($label) $pid> $@" >&2 ; } err() { echo "$exname($label) $pid> $@" >&2; } verb "Asking lock to delete $pidfile." timeout=5 start=$SECONDS while true; do ## ask for lock on $lockfile (fd 200) if ! flock -n -x 200; then verb "Couldn't acquire primary lock... (elapsed $((SECONDS - start)))" else verb "Acquired lock '$label' on pidfile." if ! [ -e "$pidfile" ]; then verb "No more pidfile, somebody deleted for us ?1?" exit 1 fi if ! content=$(cat "$pidfile" 2>/dev/null); then err "Can't read $pidfile" exit 1 fi read opid opriority < <(echo "$content" | head -n 1) opriority=${opriority:-1} if [ "$opid" == "$pid" ]; then verb "Deleted pidfile. Releasing lock." rm -f "$pidfile" exit 0 else verb "Removing duplicates in pidfile. Releasing lock." [ "$remove_duplicate" ] && sed -ri "/^$pid $md5code$/d" "$pidfile" exit 0 fi fi if [ "$timeout" ] && [ "$timeout" -lt "$((SECONDS - start))" ]; then err "Timeout reached (${timeout}s) while waiting for lock on $label" exit "$errcode" fi sleep 1 done ) 200> "$lockfile" } ## appends a command to the signal handler functions # # example: trap_add EXIT,INT close_ssh "$ip" trap_add() { local sigs="$1" sig cmd old_cmd shift || { echo "${FUNCNAME} usage error" >&2 return 1 } cmd="$@" while IFS="," read -d "," sig; do prev_cmd="$(trap -p "$sig")" if [ "$prev_cmd" ]; then new_cmd="${prev_cmd#trap -- \'}" new_cmd="${new_cmd%\' "$sig"};$cmd" else new_cmd="$cmd" fi trap -- "$new_cmd" "$sig" || { echo "unable to add command '$@' to trap $sig" >&2 ; return 1 } done < <(echo "$sigs,") } remove_all_pid_file() { while read -d "," label; do { remove_pid_file "$label" || err "Could not delete $label" } & done < <(echo "$LOCKLABELS,") wait } ## ## Code ## pid="$$" trap_add EXIT "remove_all_pid_file" while read -d "," label; do acquire_pid_file "$label" || exit "$errcode" & done < <(echo "$LOCKLABELS,") wait if [ "$cmd" ]; then bash -c "$cmd" else "${command[@]}" fi errlvl="$?" exit "$?"