586 lines
18 KiB
586 lines
18 KiB
#!/bin/bash
|
|
|
|
##
|
|
## Here's an example crontab:
|
|
##
|
|
## SHELL=/bin/sh
|
|
## PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
|
|
##
|
|
## 49 */2 * * * root mirror-dir run -d core-05.0k.io:10023 -u rsync /etc /home /opt/apps 2>&1 | logger -t mirror-dir
|
|
##
|
|
|
|
|
|
#:-
|
|
. /etc/shlib
|
|
#:-
|
|
|
|
include common
|
|
include parse
|
|
include process
|
|
include cmdline
|
|
include array
|
|
|
|
depends shyaml lock
|
|
|
|
|
|
##
|
|
## Functions
|
|
##
|
|
|
|
|
|
MIRROR_DIR_LOG=/var/log/mirror-dir.log
|
|
MIRROR_DIR_REPORT_MAX_READ_LINE=1000000
|
|
R_DATE='[0-9]{4,4}-[01][0-9]-[0-3][0-9] [012][0-9]:[0-5][0-9]:[0-5][0-9][+-][01][0-9][0-5][0-9]'
|
|
|
|
mirror-dir:report() {
|
|
|
|
local s1 s2 s3 d1 d2 d3 host source sent received rate
|
|
|
|
while read s1 s2 d1 d2 host source sent received rate; do
|
|
s=$(date -d"$s1 $s2" --rfc-3339=seconds)
|
|
s_s=$(date -d"$s1 $s2" +%s)
|
|
d_s=$(date -d"$d1 $d2" +%s)
|
|
duration=$((d_s - s_s))
|
|
printf "%s %-15s %-30s | %s %s %s %10s\n" \
|
|
"$s" "$host" "$source" "$sent" "$received" "$rate" "$(print_duration "$duration")"
|
|
done < <(
|
|
tail "$MIRROR_DIR_LOG" -n "$MIRROR_DIR_REPORT_MAX_READ_LINE" |
|
|
egrep "^${R_DATE} (Starting|sent)" |
|
|
sed -r 's/\s*\(.*\)$//g
|
|
s/ (([0-9]{1,3},)*[0-9]{1,3})(\.[0-9]{2,2})? bytes(\/sec)?/:\1/g
|
|
s/,//g
|
|
s/ :([0-9]+)$/ rate:\1/g' |
|
|
grep -v "^--$" |
|
|
sed -r "/Starting/N;
|
|
{s/\n(${R_DATE} )(.*)sent/ \1 sent/g}" |
|
|
sed -r "s/^(${R_DATE} )Starting rsync: ([^ ]+) -> ([^ ]+) (${R_DATE} )/\1\4\3 \2/g
|
|
s/ +/ /g
|
|
s/ [a-z]+:/ /g" |
|
|
egrep "^${R_DATE} ${R_DATE} [^ ]+ /[^ ]+ [0-9]+ [0-9]+ [0-9]+$"
|
|
) |
|
|
numfmt --field=6,7 --to=iec-i --suffix=B --padding=8 |
|
|
numfmt --field=8 --to=iec-i --suffix=B/s --padding=10 |
|
|
sed -r 's/ \| / /g'
|
|
}
|
|
|
|
|
|
mirror-dir:run() {
|
|
|
|
local hostname="$1" dests="$2" source_dirs
|
|
shift 2
|
|
|
|
dests=($dests) ## individual dests can't use any space-like separators
|
|
source_dirs=("$@")
|
|
|
|
dest_path=/var/mirror/$hostname
|
|
state_dir=/var/run/mirror-dir
|
|
mkdir -p "$state_dir"
|
|
rsync_options=(
|
|
${RSYNC_OPTIONS:-} --stats --out-format='%i %o %f %l %b')
|
|
ssh_options=(${SSH_OPTIONS:--o StrictHostKeyChecking=no})
|
|
|
|
for dest in "${dests[@]}"; do
|
|
dest_rsync_options=("${rsync_options[@]}")
|
|
if [[ "$dest" == *"/"* ]]; then
|
|
dest_rsync_options+=("--bwlimit" "${dest##*/}")
|
|
dest="${dest%/*}"
|
|
fi
|
|
dest_for_session="$dest"
|
|
|
|
for d in "${source_dirs[@]}"; do
|
|
|
|
current_rsync_options=("${dest_rsync_options[@]}")
|
|
|
|
session_id="$(echo "${dest_for_session}$d" | md5_compat)"
|
|
session_id="${session_id:1:8}"
|
|
|
|
if [[ "$dest" == *":"* ]]; then
|
|
ssh_options+=("-p" "${dest#*:}")
|
|
dest="${dest%%:*}"
|
|
fi
|
|
|
|
dirpath="$(dirname "$d")"
|
|
if [ "$dirpath" == "/" ]; then
|
|
dir="/$(basename "$d")"
|
|
else
|
|
dir="$dirpath/$(basename "$d")"
|
|
fi
|
|
|
|
[ -d "$dir" ] || {
|
|
warn "ignoring '$dir' as it is not existing."
|
|
continue
|
|
}
|
|
|
|
lock_label=$exname-$hostname-${session_id}
|
|
|
|
tmp_exclude_patterns=/tmp/${lock_label}.exclude_patterns.tmp
|
|
## Adding the base of the dir if required... seems necessary with
|
|
## the rsync option that replicate the full path.
|
|
has_exclude_pattern=
|
|
while read-0 exclude_dir; do
|
|
if [ -z "$has_exclude_pattern" ]; then
|
|
echo "Adding exclude patterns for source '$dir':" >&2
|
|
has_exclude_pattern=1
|
|
fi
|
|
if [[ "$exclude_dir" == "/"* ]]; then
|
|
exclude_dir="$dir${exclude_dir}"
|
|
fi
|
|
echo " - $exclude_dir" >&2
|
|
p0 "$exclude_dir"
|
|
done < <(get_exclude_patterns "$dir") > "$tmp_exclude_patterns"
|
|
if [ -n "$has_exclude_pattern" ]; then
|
|
current_rsync_options+=("-0" "--exclude-from"="$tmp_exclude_patterns")
|
|
else
|
|
echo "No exclude patterns for '$dir'."
|
|
fi
|
|
echo ---------------------------------
|
|
echo "Starting rsync: $d -> $dest ($(date))"
|
|
cmd=(
|
|
nice -n 15 \
|
|
rsync "${current_rsync_options[@]}" -azvARH \
|
|
-e "sudo -u $user ssh ${ssh_options[*]}" \
|
|
--delete --delete-excluded \
|
|
--partial --partial-dir .rsync-partial \
|
|
--numeric-ids "$dir/" "$user@$dest":"$dest_path"
|
|
)
|
|
echo "${cmd[@]}"
|
|
start="$SECONDS"
|
|
retry=1
|
|
errlvls=()
|
|
while true; do
|
|
lock "$lock_label" -v -D -k -- "${cmd[@]}"
|
|
errlvl="$?"
|
|
case "$errlvl" in
|
|
20) ## Received SIGUSR1, SIGINTT
|
|
echo "!! Rsync received SIGUSR1 or SIGINT."
|
|
echo " .. Full interruption while $d -> $dest and after $((SECONDS - start))s"
|
|
append_trim "${state_dir}/${session_id}-fail" \
|
|
"$dest $d $((SECONDS - start)) signal SIGUSR1, SIGINT or SIGHUP"
|
|
break 2
|
|
;;
|
|
137|143) ## killed SIGKILL, SIGTERM
|
|
echo "!! Rsync received $(kill -l "$errlvl")"
|
|
echo " .. Full interruption while $d -> $dest and after $((SECONDS - start))s"
|
|
append_trim "${state_dir}/${session_id}-fail" \
|
|
"$dest $d $((SECONDS - start)) signal: $(kill -l "$errlvl")"
|
|
break 2
|
|
;;
|
|
0)
|
|
echo "Rsync finished with success $d -> $dest in $((SECONDS - start))s"
|
|
append_trim "${state_dir}/${session_id}-success" \
|
|
"$dest $d $((SECONDS - start)) OK"
|
|
break
|
|
;;
|
|
*)
|
|
errlvls+=("$errlvl")
|
|
echo "!! Rsync failed with an errorlevel $errlvl after $((SECONDS - start))s since start."
|
|
if [ "$retry" -lt 3 ]; then
|
|
echo "!! Triggering a retry ($((++retry))/3)"
|
|
continue
|
|
else
|
|
echo "!! Tried 3 times, bailing out."
|
|
echo " .. interruption of $d -> $dest after $((SECONDS - start))s"
|
|
append_trim "${state_dir}/${session_id}-fail" \
|
|
"$dest $d $((SECONDS - start))" \
|
|
"Failed after 3 retries (errorlevels: ${errlvls[@]})"
|
|
break
|
|
fi
|
|
;;
|
|
esac
|
|
done
|
|
if [ -n "$has_exclude_pattern" ]; then
|
|
rm -fv "$tmp_exclude_patterns"
|
|
fi
|
|
done
|
|
done
|
|
|
|
}
|
|
|
|
|
|
|
|
get_exclude_patterns() {
|
|
local dir="$1"
|
|
[ -e "$config_file" ] || return
|
|
cat "$config_file" | shyaml get-values-0 "${dir//.\\./}.exclude" 2>/dev/null
|
|
}
|
|
|
|
append_trim() {
|
|
local f="$1"
|
|
shift
|
|
e "$(date --rfc-3339=s) $*"$'\n' >> "$f" &&
|
|
tail -n 5000 "$f" > "$f".tmp &&
|
|
mv "$f"{.tmp,}
|
|
}
|
|
|
|
|
|
log_tee() { tee -a "$MIRROR_DIR_LOG"; }
|
|
log_file() { cat >> "$MIRROR_DIR_LOG"; }
|
|
|
|
|
|
get_ids() {
|
|
local session_id id_done
|
|
declare -A id_done
|
|
for file in "$state_dir"/*{-fail,-success}; do
|
|
session_id=${file%-*}
|
|
[ "${id_done["$session_id"]}" ] && continue
|
|
id_done["$session_id"]=1
|
|
echo "${session_id##*/}"
|
|
done
|
|
}
|
|
|
|
|
|
mirror-dir:_get_sources() {
|
|
local DIR=("$@")
|
|
|
|
config_file="/etc/$exname/config.yml"
|
|
|
|
if [ "${#DIR[@]}" == 0 ]; then
|
|
if [ -e "$config_file" ]; then
|
|
info "No source provided on command line," \
|
|
"reading '$config_file' for default sources"
|
|
DIR=($(eval echo $(shyaml get-values default.sources < "$config_file")))
|
|
fi
|
|
fi
|
|
array_values_to_stdin DIR
|
|
}
|
|
|
|
|
|
[[ "${BASH_SOURCE[0]}" != "${0}" ]] && SOURCED=true
|
|
|
|
version=0.1
|
|
desc='Manage mirroring of local directory to distant hosts'
|
|
help=""
|
|
|
|
|
|
|
|
##
|
|
## Code
|
|
##
|
|
|
|
|
|
|
|
cmdline.spec.gnu
|
|
|
|
|
|
cmdline.spec.gnu backup
|
|
|
|
cmdline.spec:backup:valued:-d,--dest:run() {
|
|
dests+=("$1")
|
|
}
|
|
|
|
dests=()
|
|
|
|
cmdline.spec::cmd:backup:run() {
|
|
|
|
# usage="usage: $exname -d DEST1 [-d DEST2 [...]] [-u USER] [DIR1 [DIR2 ...]]
|
|
|
|
# Preserve as much as possible the source structure, keeping hard-links, acl,
|
|
# exact numerical uids and gids, and being able to resume in very large files.
|
|
|
|
|
|
# "
|
|
|
|
: :posarg: [DIR...] 'Local directories that should be mirrored
|
|
on destination(s).
|
|
|
|
Examples: /etc /home /var/backups
|
|
|
|
If no directories are provided, the config
|
|
file root entries will be used all as
|
|
destination to copy.'
|
|
|
|
: :optval: -d,--dest 'Can be repeated. Specifies host
|
|
destination towards which files will be
|
|
send. Note that you can specify port
|
|
number after a colon and a bandwidth limit
|
|
for rsync after a '/'.
|
|
|
|
Examples: -d liszt.musicalta:10022
|
|
-d 10.8.0.19/200'
|
|
|
|
|
|
: :optval: -u,--user "(default: 'rsync')
|
|
|
|
Local AND destination system user to log
|
|
as at both ends to transfer file. This
|
|
local user need to have a no password ssh
|
|
login to it's own account on destination.
|
|
This destination account should have full
|
|
permissions access without passwd to write
|
|
with rsync-server in the destination
|
|
directory."
|
|
|
|
: :optval: -h,--hostname "(default is taken of the hostname file)
|
|
|
|
Set the destination store, this is the
|
|
name of the directory where the files
|
|
will all directories will be copied.
|
|
Beware ! if 2 hosts use the same store,
|
|
this means they'll conflictingly update
|
|
the same destination directory. Only
|
|
use this if you know what you are
|
|
doing."
|
|
|
|
: :optfla: -q,--quiet "Prevent output on stderr. Please note that
|
|
output is always written in log file."
|
|
|
|
|
|
[ "$UID" != "0" ] && echo "You must be root." && exit 1
|
|
|
|
[ -n "$opt_hostname" ] || opt_hostname=$(hostname)
|
|
|
|
if [ -n "$opt_quiet" ]; then
|
|
log_facility=log_file
|
|
else
|
|
log_facility=log_tee
|
|
fi
|
|
|
|
if [ -z "$opt_hostname" ]; then
|
|
err "Couldn't figure a valid hostname. Please specify one with \`\`-h STORENAME\`\`."
|
|
return 1
|
|
fi
|
|
|
|
user=${opt_user:-rsync}
|
|
|
|
config_file="/etc/$exname/config.yml"
|
|
|
|
array_read-0 DIR < <(
|
|
{
|
|
{
|
|
mirror-dir:_get_sources "${DIR[@]}"
|
|
} 3>&1 1>&2 2>&3 | "$log_facility"
|
|
} 3>&1 1>&2 2>&3
|
|
)
|
|
|
|
if [ "${#DIR[@]}" == 0 ]; then
|
|
err "You must specify at least one source directory to mirror" \
|
|
"on command line (or in a config file)."
|
|
echo "$usage" >&2
|
|
exit 1
|
|
fi
|
|
info "Source directories are: ${DIR[@]}" 2>&1 | "$log_facility"
|
|
|
|
if [ "${#dests[@]}" == 0 ]; then
|
|
err "You must specify at least a destination (using \`\`-d\`\` or \`\`--dest\`\`)."
|
|
echo "$usage" >&2
|
|
return 1
|
|
fi
|
|
|
|
## XXXvlab: note that we use here a special version of awk supporting
|
|
## ``strftime``. This is only to prefix a date to the logs. Yes, we know
|
|
## about ``--out-format`` and its ``%t`` which would be ideal, but it
|
|
## doesn't output proper UTC time (it is system time, no timezone info).
|
|
mirror-dir:run "$opt_hostname" "${dests[*]}" "${DIR[@]}" 2>&1 |
|
|
awk -W interactive '{ print strftime("%Y-%m-%d %H:%M:%S%z"), $0 }' |
|
|
"$log_facility"
|
|
|
|
}
|
|
|
|
|
|
|
|
cmdline.spec.gnu report
|
|
cmdline.spec::cmd:report:run() {
|
|
mirror-dir:report
|
|
}
|
|
|
|
|
|
|
|
cmdline.spec:check:valued:-d,--dest:run() {
|
|
dests+=("$1")
|
|
}
|
|
|
|
cmdline.spec.gnu check
|
|
cmdline.spec::cmd:check:run() {
|
|
|
|
# usage="usage: $exname -d DEST1 [-d DEST2 [...]] [DIR1 [DIR2 ...]]
|
|
|
|
# Checks that mirror-dir did it's job. Will send an email if not.
|
|
# "
|
|
|
|
|
|
: :posarg: [DIR...] 'Local directories that should be mirrored
|
|
on destination(s).
|
|
|
|
Examples: /etc /home /var/backups
|
|
|
|
If no directories are provided, the config
|
|
file root entries will be used all as
|
|
destination to copy.'
|
|
|
|
: :optval: -d,--dest 'Can be repeated. Specifies host
|
|
destination towards which files will be
|
|
send. Note that you can specify port
|
|
number after a colon and a bandwidth limit
|
|
for rsync after a '/'.
|
|
|
|
Examples: -d liszt.musicalta:10022
|
|
-d 10.8.0.19/200'
|
|
|
|
: :optval: -n,--time-spec "Give a full English time spec about how
|
|
old the last full run of rsync should
|
|
be at most. Defaults to '12 hours'.
|
|
|
|
Examples: -n '12 hours'
|
|
-n '1 day'"
|
|
|
|
: :optfla: -m,--mail-alert "Send alert via email. This is intended to
|
|
use in cron."
|
|
|
|
|
|
[ "$UID" != "0" ] && echo "You must be root." && exit 1
|
|
|
|
if [ "${#dests[@]}" == 0 ]; then
|
|
err "You must specify at least a destination (using \`\`-d\`\` or \`\`--dest\`\`)."
|
|
echo "$usage" >&2
|
|
return 1
|
|
fi
|
|
|
|
if [ -n "$opt_mail_alert" ]; then
|
|
CHECK_DEFAULT_SOURCE=/etc/default/alerting
|
|
[ -f "$CHECK_DEFAULT_SOURCE" ] && . "$CHECK_DEFAULT_SOURCE"
|
|
|
|
if [ "${#MAIL_DESTS[@]}" == 0 ]; then
|
|
echo "You must set at least one recipient destination for mails." >&2
|
|
echo " You can do that in '$CHECK_DEFAULT_SOURCE', using the variable" >&2
|
|
echo " '\$MAIL_DESTS'. Note this is a bash array variable." >&2
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
array_read-0 DIR < <(mirror-dir:_get_sources "${DIR[@]}")
|
|
|
|
if [ "${#DIR[@]}" == 0 ]; then
|
|
err "You must specify at least one source directory to mirror" \
|
|
"on command line (or in a config file)."
|
|
echo "$usage" >&2
|
|
exit 1
|
|
fi
|
|
|
|
time_spec="${opt_time_spec:-12 hours}"
|
|
|
|
state_dir=/var/run/mirror-dir
|
|
|
|
## Getting max string length of source
|
|
dir_max_len=0
|
|
for d in "${DIR[@]}"; do
|
|
[ "$dir_max_len" -lt "${#d}" ] &&
|
|
dir_max_len="${#d}"
|
|
done
|
|
|
|
## Getting max string length of dests
|
|
dest_max_len=0
|
|
for d in "${dests[@]}"; do
|
|
[ "$dest_max_len" -lt "${#d}" ] &&
|
|
dest_max_len="${#d}"
|
|
done
|
|
|
|
declare -A sessions=()
|
|
bad_sessions=()
|
|
msg=()
|
|
for dest in "${dests[@]}"; do
|
|
if [[ "$dest" == *"/"* ]]; then
|
|
current_rsync_options+=("--bwlimit" "${dest##*/}")
|
|
dest="${dest%/*}"
|
|
fi
|
|
|
|
for d in "${DIR[@]}"; do
|
|
session_id="$(echo "$dest$d" | md5_compat)"
|
|
session_id="${session_id:1:8}"
|
|
sessions["$session_id"]="$dest $d"
|
|
f=$(find "$state_dir" \
|
|
-maxdepth 1 -newermt "-$time_spec" \
|
|
-type f -name "${session_id}-success")
|
|
if [ -z "$f" ]; then
|
|
if [ -e "$state_dir/${session_id}-success" ]; then
|
|
msg+=("$(printf "%-${dest_max_len}s %-${dir_max_len}s last full sync %s" \
|
|
"$dest" "$d" \
|
|
"$(stat -c %y "$state_dir/${session_id}-success" |
|
|
sed -r 's/\.[0-9]{9,9} / /g')")")
|
|
else
|
|
msg+=("$(printf "%-${dest_max_len}s %-${dir_max_len}s never finished yet" \
|
|
"$dest" "$d")")
|
|
fi
|
|
bad_sessions+=("$session_id")
|
|
fi
|
|
done
|
|
done
|
|
|
|
[ "${#msg[@]}" == 0 ] && return 0
|
|
|
|
if [ -z "$opt_mail_alert" ]; then
|
|
echo
|
|
echo "${DARKRED}These destination/source directory were" \
|
|
"last synced more than $time_spec ago:${NORMAL}"
|
|
for m in "${msg[@]}"; do
|
|
printf " %s\n" "$m"
|
|
done
|
|
echo
|
|
echo "${DARKRED}Last failed logs:${NORMAL}"
|
|
for m in "${bad_sessions[@]}"; do
|
|
if [ -e "${state_dir}"/$m-fail ]; then
|
|
echo " ${sessions[$m]}:"
|
|
tail -n 5 "${state_dir}"/$m-fail | cut -f 1,2,5- -d " " | sed -r "s/^/ /g"
|
|
echo
|
|
else
|
|
echo " ${sessions[$m]}: no fail log available"
|
|
fi
|
|
done
|
|
return 1
|
|
fi
|
|
|
|
|
|
##
|
|
## Mail
|
|
##
|
|
|
|
if [ "${#msg[@]}" != 0 ]; then
|
|
|
|
cat <<EOF | mail -s "[$(hostname)] mirror backup failing" "${MAIL_DESTS[@]}"
|
|
Hi,
|
|
|
|
Some configured mirroring targets have not finished gracefully in
|
|
the last $time_spec. Please see for yourself:
|
|
|
|
$(
|
|
for m in "${msg[@]}"; do
|
|
echo " $m"
|
|
done
|
|
)
|
|
|
|
You might want to find these following information of some use:
|
|
|
|
$(
|
|
|
|
for m in "${bad_sessions[@]}"; do
|
|
if [ -e "${state_dir}"/$m-fail ]; then
|
|
echo " ${sessions[$m]}:"
|
|
tail -n 5 "${state_dir}"/$m-fail | cut -f 1,2,5- -d " " | sed -r "s/^/ /g"
|
|
echo
|
|
else
|
|
echo " ${sessions[$m]}: no fail log available"
|
|
fi
|
|
done
|
|
|
|
)
|
|
|
|
Hoping all this will help you sort out the issue...
|
|
|
|
Yours sincerly,
|
|
--
|
|
mirror-dir-check
|
|
|
|
PS: You received this email because your email is listed in
|
|
\$MAIL_DESTS of '$CHECK_DEFAULT_SOURCE' of '$(hostname)'
|
|
(also known as $(cat /etc/mailname)).
|
|
|
|
EOF
|
|
|
|
fi
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
cmdline::parse "$@"
|