586 lines
18 KiB

#!/bin/bash
##
## Here's an example crontab:
##
## SHELL=/bin/sh
## PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
##
## 49 */2 * * * root mirror-dir run -d core-05.0k.io:10023 -u rsync /etc /home /opt/apps 2>&1 | logger -t mirror-dir
##
#:-
. /etc/shlib
#:-
include common
include parse
include process
include cmdline
include array
depends shyaml lock
##
## Functions
##
MIRROR_DIR_LOG=/var/log/mirror-dir.log
MIRROR_DIR_REPORT_MAX_READ_LINE=1000000
R_DATE='[0-9]{4,4}-[01][0-9]-[0-3][0-9] [012][0-9]:[0-5][0-9]:[0-5][0-9][+-][01][0-9][0-5][0-9]'
mirror-dir:report() {
local s1 s2 s3 d1 d2 d3 host source sent received rate
while read s1 s2 d1 d2 host source sent received rate; do
s=$(date -d"$s1 $s2" --rfc-3339=seconds)
s_s=$(date -d"$s1 $s2" +%s)
d_s=$(date -d"$d1 $d2" +%s)
duration=$((d_s - s_s))
printf "%s %-15s %-30s | %s %s %s %10s\n" \
"$s" "$host" "$source" "$sent" "$received" "$rate" "$(print_duration "$duration")"
done < <(
tail "$MIRROR_DIR_LOG" -n "$MIRROR_DIR_REPORT_MAX_READ_LINE" |
egrep "^${R_DATE} (Starting|sent)" |
sed -r 's/\s*\(.*\)$//g
s/ (([0-9]{1,3},)*[0-9]{1,3})(\.[0-9]{2,2})? bytes(\/sec)?/:\1/g
s/,//g
s/ :([0-9]+)$/ rate:\1/g' |
grep -v "^--$" |
sed -r "/Starting/N;
{s/\n(${R_DATE} )(.*)sent/ \1 sent/g}" |
sed -r "s/^(${R_DATE} )Starting rsync: ([^ ]+) -> ([^ ]+) (${R_DATE} )/\1\4\3 \2/g
s/ +/ /g
s/ [a-z]+:/ /g" |
egrep "^${R_DATE} ${R_DATE} [^ ]+ /[^ ]+ [0-9]+ [0-9]+ [0-9]+$"
) |
numfmt --field=6,7 --to=iec-i --suffix=B --padding=8 |
numfmt --field=8 --to=iec-i --suffix=B/s --padding=10 |
sed -r 's/ \| / /g'
}
mirror-dir:run() {
local hostname="$1" dests="$2" source_dirs
shift 2
dests=($dests) ## individual dests can't use any space-like separators
source_dirs=("$@")
dest_path=/var/mirror/$hostname
state_dir=/var/run/mirror-dir
mkdir -p "$state_dir"
rsync_options=(
${RSYNC_OPTIONS:-} --stats --out-format='%i %o %f %l %b')
ssh_options=(${SSH_OPTIONS:--o StrictHostKeyChecking=no})
for dest in "${dests[@]}"; do
dest_rsync_options=("${rsync_options[@]}")
if [[ "$dest" == *"/"* ]]; then
dest_rsync_options+=("--bwlimit" "${dest##*/}")
dest="${dest%/*}"
fi
dest_for_session="$dest"
for d in "${source_dirs[@]}"; do
current_rsync_options=("${dest_rsync_options[@]}")
session_id="$(echo "${dest_for_session}$d" | md5_compat)"
session_id="${session_id:1:8}"
if [[ "$dest" == *":"* ]]; then
ssh_options+=("-p" "${dest#*:}")
dest="${dest%%:*}"
fi
dirpath="$(dirname "$d")"
if [ "$dirpath" == "/" ]; then
dir="/$(basename "$d")"
else
dir="$dirpath/$(basename "$d")"
fi
[ -d "$dir" ] || {
warn "ignoring '$dir' as it is not existing."
continue
}
lock_label=$exname-$hostname-${session_id}
tmp_exclude_patterns=/tmp/${lock_label}.exclude_patterns.tmp
## Adding the base of the dir if required... seems necessary with
## the rsync option that replicate the full path.
has_exclude_pattern=
while read-0 exclude_dir; do
if [ -z "$has_exclude_pattern" ]; then
echo "Adding exclude patterns for source '$dir':" >&2
has_exclude_pattern=1
fi
if [[ "$exclude_dir" == "/"* ]]; then
exclude_dir="$dir${exclude_dir}"
fi
echo " - $exclude_dir" >&2
p0 "$exclude_dir"
done < <(get_exclude_patterns "$dir") > "$tmp_exclude_patterns"
if [ -n "$has_exclude_pattern" ]; then
current_rsync_options+=("-0" "--exclude-from"="$tmp_exclude_patterns")
else
echo "No exclude patterns for '$dir'."
fi
echo ---------------------------------
echo "Starting rsync: $d -> $dest ($(date))"
cmd=(
nice -n 15 \
rsync "${current_rsync_options[@]}" -azvARH \
-e "sudo -u $user ssh ${ssh_options[*]}" \
--delete --delete-excluded \
--partial --partial-dir .rsync-partial \
--numeric-ids "$dir/" "$user@$dest":"$dest_path"
)
echo "${cmd[@]}"
start="$SECONDS"
retry=1
errlvls=()
while true; do
lock "$lock_label" -v -D -k -- "${cmd[@]}"
errlvl="$?"
case "$errlvl" in
20) ## Received SIGUSR1, SIGINTT
echo "!! Rsync received SIGUSR1 or SIGINT."
echo " .. Full interruption while $d -> $dest and after $((SECONDS - start))s"
append_trim "${state_dir}/${session_id}-fail" \
"$dest $d $((SECONDS - start)) signal SIGUSR1, SIGINT or SIGHUP"
break 2
;;
137|143) ## killed SIGKILL, SIGTERM
echo "!! Rsync received $(kill -l "$errlvl")"
echo " .. Full interruption while $d -> $dest and after $((SECONDS - start))s"
append_trim "${state_dir}/${session_id}-fail" \
"$dest $d $((SECONDS - start)) signal: $(kill -l "$errlvl")"
break 2
;;
0)
echo "Rsync finished with success $d -> $dest in $((SECONDS - start))s"
append_trim "${state_dir}/${session_id}-success" \
"$dest $d $((SECONDS - start)) OK"
break
;;
*)
errlvls+=("$errlvl")
echo "!! Rsync failed with an errorlevel $errlvl after $((SECONDS - start))s since start."
if [ "$retry" -lt 3 ]; then
echo "!! Triggering a retry ($((++retry))/3)"
continue
else
echo "!! Tried 3 times, bailing out."
echo " .. interruption of $d -> $dest after $((SECONDS - start))s"
append_trim "${state_dir}/${session_id}-fail" \
"$dest $d $((SECONDS - start))" \
"Failed after 3 retries (errorlevels: ${errlvls[@]})"
break
fi
;;
esac
done
if [ -n "$has_exclude_pattern" ]; then
rm -fv "$tmp_exclude_patterns"
fi
done
done
}
get_exclude_patterns() {
local dir="$1"
[ -e "$config_file" ] || return
cat "$config_file" | shyaml get-values-0 "${dir//.\\./}.exclude" 2>/dev/null
}
append_trim() {
local f="$1"
shift
e "$(date --rfc-3339=s) $*"$'\n' >> "$f" &&
tail -n 5000 "$f" > "$f".tmp &&
mv "$f"{.tmp,}
}
log_tee() { tee -a "$MIRROR_DIR_LOG"; }
log_file() { cat >> "$MIRROR_DIR_LOG"; }
get_ids() {
local session_id id_done
declare -A id_done
for file in "$state_dir"/*{-fail,-success}; do
session_id=${file%-*}
[ "${id_done["$session_id"]}" ] && continue
id_done["$session_id"]=1
echo "${session_id##*/}"
done
}
mirror-dir:_get_sources() {
local DIR=("$@")
config_file="/etc/$exname/config.yml"
if [ "${#DIR[@]}" == 0 ]; then
if [ -e "$config_file" ]; then
info "No source provided on command line," \
"reading '$config_file' for default sources"
DIR=($(eval echo $(shyaml get-values default.sources < "$config_file")))
fi
fi
array_values_to_stdin DIR
}
[[ "${BASH_SOURCE[0]}" != "${0}" ]] && SOURCED=true
version=0.1
desc='Manage mirroring of local directory to distant hosts'
help=""
##
## Code
##
cmdline.spec.gnu
cmdline.spec.gnu backup
cmdline.spec:backup:valued:-d,--dest:run() {
dests+=("$1")
}
dests=()
cmdline.spec::cmd:backup:run() {
# usage="usage: $exname -d DEST1 [-d DEST2 [...]] [-u USER] [DIR1 [DIR2 ...]]
# Preserve as much as possible the source structure, keeping hard-links, acl,
# exact numerical uids and gids, and being able to resume in very large files.
# "
: :posarg: [DIR...] 'Local directories that should be mirrored
on destination(s).
Examples: /etc /home /var/backups
If no directories are provided, the config
file root entries will be used all as
destination to copy.'
: :optval: -d,--dest 'Can be repeated. Specifies host
destination towards which files will be
send. Note that you can specify port
number after a colon and a bandwidth limit
for rsync after a '/'.
Examples: -d liszt.musicalta:10022
-d 10.8.0.19/200'
: :optval: -u,--user "(default: 'rsync')
Local AND destination system user to log
as at both ends to transfer file. This
local user need to have a no password ssh
login to it's own account on destination.
This destination account should have full
permissions access without passwd to write
with rsync-server in the destination
directory."
: :optval: -h,--hostname "(default is taken of the hostname file)
Set the destination store, this is the
name of the directory where the files
will all directories will be copied.
Beware ! if 2 hosts use the same store,
this means they'll conflictingly update
the same destination directory. Only
use this if you know what you are
doing."
: :optfla: -q,--quiet "Prevent output on stderr. Please note that
output is always written in log file."
[ "$UID" != "0" ] && echo "You must be root." && exit 1
[ -n "$opt_hostname" ] || opt_hostname=$(hostname)
if [ -n "$opt_quiet" ]; then
log_facility=log_file
else
log_facility=log_tee
fi
if [ -z "$opt_hostname" ]; then
err "Couldn't figure a valid hostname. Please specify one with \`\`-h STORENAME\`\`."
return 1
fi
user=${opt_user:-rsync}
config_file="/etc/$exname/config.yml"
array_read-0 DIR < <(
{
{
mirror-dir:_get_sources "${DIR[@]}"
} 3>&1 1>&2 2>&3 | "$log_facility"
} 3>&1 1>&2 2>&3
)
if [ "${#DIR[@]}" == 0 ]; then
err "You must specify at least one source directory to mirror" \
"on command line (or in a config file)."
echo "$usage" >&2
exit 1
fi
info "Source directories are: ${DIR[@]}" 2>&1 | "$log_facility"
if [ "${#dests[@]}" == 0 ]; then
err "You must specify at least a destination (using \`\`-d\`\` or \`\`--dest\`\`)."
echo "$usage" >&2
return 1
fi
## XXXvlab: note that we use here a special version of awk supporting
## ``strftime``. This is only to prefix a date to the logs. Yes, we know
## about ``--out-format`` and its ``%t`` which would be ideal, but it
## doesn't output proper UTC time (it is system time, no timezone info).
mirror-dir:run "$opt_hostname" "${dests[*]}" "${DIR[@]}" 2>&1 |
awk -W interactive '{ print strftime("%Y-%m-%d %H:%M:%S%z"), $0 }' |
"$log_facility"
}
cmdline.spec.gnu report
cmdline.spec::cmd:report:run() {
mirror-dir:report
}
cmdline.spec:check:valued:-d,--dest:run() {
dests+=("$1")
}
cmdline.spec.gnu check
cmdline.spec::cmd:check:run() {
# usage="usage: $exname -d DEST1 [-d DEST2 [...]] [DIR1 [DIR2 ...]]
# Checks that mirror-dir did it's job. Will send an email if not.
# "
: :posarg: [DIR...] 'Local directories that should be mirrored
on destination(s).
Examples: /etc /home /var/backups
If no directories are provided, the config
file root entries will be used all as
destination to copy.'
: :optval: -d,--dest 'Can be repeated. Specifies host
destination towards which files will be
send. Note that you can specify port
number after a colon and a bandwidth limit
for rsync after a '/'.
Examples: -d liszt.musicalta:10022
-d 10.8.0.19/200'
: :optval: -n,--time-spec "Give a full English time spec about how
old the last full run of rsync should
be at most. Defaults to '12 hours'.
Examples: -n '12 hours'
-n '1 day'"
: :optfla: -m,--mail-alert "Send alert via email. This is intended to
use in cron."
[ "$UID" != "0" ] && echo "You must be root." && exit 1
if [ "${#dests[@]}" == 0 ]; then
err "You must specify at least a destination (using \`\`-d\`\` or \`\`--dest\`\`)."
echo "$usage" >&2
return 1
fi
if [ -n "$opt_mail_alert" ]; then
CHECK_DEFAULT_SOURCE=/etc/default/alerting
[ -f "$CHECK_DEFAULT_SOURCE" ] && . "$CHECK_DEFAULT_SOURCE"
if [ "${#MAIL_DESTS[@]}" == 0 ]; then
echo "You must set at least one recipient destination for mails." >&2
echo " You can do that in '$CHECK_DEFAULT_SOURCE', using the variable" >&2
echo " '\$MAIL_DESTS'. Note this is a bash array variable." >&2
exit 1
fi
fi
array_read-0 DIR < <(mirror-dir:_get_sources "${DIR[@]}")
if [ "${#DIR[@]}" == 0 ]; then
err "You must specify at least one source directory to mirror" \
"on command line (or in a config file)."
echo "$usage" >&2
exit 1
fi
time_spec="${opt_time_spec:-12 hours}"
state_dir=/var/run/mirror-dir
## Getting max string length of source
dir_max_len=0
for d in "${DIR[@]}"; do
[ "$dir_max_len" -lt "${#d}" ] &&
dir_max_len="${#d}"
done
## Getting max string length of dests
dest_max_len=0
for d in "${dests[@]}"; do
[ "$dest_max_len" -lt "${#d}" ] &&
dest_max_len="${#d}"
done
declare -A sessions=()
bad_sessions=()
msg=()
for dest in "${dests[@]}"; do
if [[ "$dest" == *"/"* ]]; then
current_rsync_options+=("--bwlimit" "${dest##*/}")
dest="${dest%/*}"
fi
for d in "${DIR[@]}"; do
session_id="$(echo "$dest$d" | md5_compat)"
session_id="${session_id:1:8}"
sessions["$session_id"]="$dest $d"
f=$(find "$state_dir" \
-maxdepth 1 -newermt "-$time_spec" \
-type f -name "${session_id}-success")
if [ -z "$f" ]; then
if [ -e "$state_dir/${session_id}-success" ]; then
msg+=("$(printf "%-${dest_max_len}s %-${dir_max_len}s last full sync %s" \
"$dest" "$d" \
"$(stat -c %y "$state_dir/${session_id}-success" |
sed -r 's/\.[0-9]{9,9} / /g')")")
else
msg+=("$(printf "%-${dest_max_len}s %-${dir_max_len}s never finished yet" \
"$dest" "$d")")
fi
bad_sessions+=("$session_id")
fi
done
done
[ "${#msg[@]}" == 0 ] && return 0
if [ -z "$opt_mail_alert" ]; then
echo
echo "${DARKRED}These destination/source directory were" \
"last synced more than $time_spec ago:${NORMAL}"
for m in "${msg[@]}"; do
printf " %s\n" "$m"
done
echo
echo "${DARKRED}Last failed logs:${NORMAL}"
for m in "${bad_sessions[@]}"; do
if [ -e "${state_dir}"/$m-fail ]; then
echo " ${sessions[$m]}:"
tail -n 5 "${state_dir}"/$m-fail | cut -f 1,2,5- -d " " | sed -r "s/^/ /g"
echo
else
echo " ${sessions[$m]}: no fail log available"
fi
done
return 1
fi
##
## Mail
##
if [ "${#msg[@]}" != 0 ]; then
cat <<EOF | mail -s "[$(hostname)] mirror backup failing" "${MAIL_DESTS[@]}"
Hi,
Some configured mirroring targets have not finished gracefully in
the last $time_spec. Please see for yourself:
$(
for m in "${msg[@]}"; do
echo " $m"
done
)
You might want to find these following information of some use:
$(
for m in "${bad_sessions[@]}"; do
if [ -e "${state_dir}"/$m-fail ]; then
echo " ${sessions[$m]}:"
tail -n 5 "${state_dir}"/$m-fail | cut -f 1,2,5- -d " " | sed -r "s/^/ /g"
echo
else
echo " ${sessions[$m]}: no fail log available"
fi
done
)
Hoping all this will help you sort out the issue...
Yours sincerly,
--
mirror-dir-check
PS: You received this email because your email is listed in
\$MAIL_DESTS of '$CHECK_DEFAULT_SOURCE' of '$(hostname)'
(also known as $(cat /etc/mailname)).
EOF
fi
}
cmdline::parse "$@"