#!/bin/bash ## ## Here's an example crontab: ## ## SHELL=/bin/sh ## PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin ## ## 49 */2 * * * root mirror-dir run -d core-05.0k.io:10023 -u rsync /etc /home /opt/apps 2>&1 | logger -t mirror-dir ## #:- . /etc/shlib #:- include common include parse include process include cmdline include array depends shyaml lock ## ## Functions ## MIRROR_DIR_LOG=/var/log/mirror-dir.log MIRROR_DIR_REPORT_MAX_READ_LINE=1000000 R_DATE='[0-9]{4,4}-[01][0-9]-[0-3][0-9] [012][0-9]:[0-5][0-9]:[0-5][0-9][+-][01][0-9][0-5][0-9]' mirror-dir:report() { local s1 s2 s3 d1 d2 d3 host source sent received rate while read s1 s2 d1 d2 host source sent received rate; do s=$(date -d"$s1 $s2" --rfc-3339=seconds) s_s=$(date -d"$s1 $s2" +%s) d_s=$(date -d"$d1 $d2" +%s) duration=$((d_s - s_s)) printf "%s %-15s %-30s | %s %s %s %10s\n" \ "$s" "$host" "$source" "$sent" "$received" "$rate" "$(print_duration "$duration")" done < <( tail "$MIRROR_DIR_LOG" -n "$MIRROR_DIR_REPORT_MAX_READ_LINE" | egrep "^${R_DATE} (Starting|sent)" | sed -r 's/\s*\(.*\)$//g s/ (([0-9]{1,3},)*[0-9]{1,3})(\.[0-9]{2,2})? bytes(\/sec)?/:\1/g s/,//g s/ :([0-9]+)$/ rate:\1/g' | grep -v "^--$" | sed -r "/Starting/N; {s/\n(${R_DATE} )(.*)sent/ \1 sent/g}" | sed -r "s/^(${R_DATE} )Starting rsync: ([^ ]+) -> ([^ ]+) (${R_DATE} )/\1\4\3 \2/g s/ +/ /g s/ [a-z]+:/ /g" | egrep "^${R_DATE} ${R_DATE} [^ ]+ /[^ ]+ [0-9]+ [0-9]+ [0-9]+$" ) | numfmt --field=6,7 --to=iec-i --suffix=B --padding=8 | numfmt --field=8 --to=iec-i --suffix=B/s --padding=10 | sed -r 's/ \| / /g' } mirror-dir:run() { local hostname="$1" dests="$2" source_dirs shift 2 dests=($dests) ## individual dests can't use any space-like separators source_dirs=("$@") dest_path=/var/mirror/$hostname state_dir=/var/run/mirror-dir mkdir -p "$state_dir" rsync_options=( ${RSYNC_OPTIONS:-} --stats --out-format='%i %o %f %l %b') ssh_options=(${SSH_OPTIONS:--o StrictHostKeyChecking=no}) for dest in "${dests[@]}"; do dest_rsync_options=("${rsync_options[@]}") if [[ "$dest" == *"/"* ]]; then dest_rsync_options+=("--bwlimit" "${dest##*/}") dest="${dest%/*}" fi dest_for_session="$dest" for d in "${source_dirs[@]}"; do current_rsync_options=("${dest_rsync_options[@]}") session_id="$(echo "${dest_for_session}$d" | md5_compat)" session_id="${session_id:1:8}" if [[ "$dest" == *":"* ]]; then ssh_options+=("-p" "${dest#*:}") dest="${dest%%:*}" fi dirpath="$(dirname "$d")" if [ "$dirpath" == "/" ]; then dir="/$(basename "$d")" else dir="$dirpath/$(basename "$d")" fi [ -d "$dir" ] || { warn "ignoring '$dir' as it is not existing." continue } lock_label=$exname-$hostname-${session_id} tmp_exclude_patterns=/tmp/${lock_label}.exclude_patterns.tmp ## Adding the base of the dir if required... seems necessary with ## the rsync option that replicate the full path. has_exclude_pattern= while read-0 exclude_dir; do if [ -z "$has_exclude_pattern" ]; then echo "Adding exclude patterns for source '$dir':" >&2 has_exclude_pattern=1 fi if [[ "$exclude_dir" == "/"* ]]; then exclude_dir="$dir${exclude_dir}" fi echo " - $exclude_dir" >&2 p0 "$exclude_dir" done < <(get_exclude_patterns "$dir") > "$tmp_exclude_patterns" if [ -n "$has_exclude_pattern" ]; then current_rsync_options+=("-0" "--exclude-from"="$tmp_exclude_patterns") else echo "No exclude patterns for '$dir'." fi echo --------------------------------- echo "Starting rsync: $d -> $dest ($(date))" cmd=( nice -n 15 \ rsync "${current_rsync_options[@]}" -azvARH \ -e "sudo -u $user ssh ${ssh_options[*]}" \ --delete --delete-excluded \ --partial --partial-dir .rsync-partial \ --numeric-ids "$dir/" "$user@$dest":"$dest_path" ) echo "${cmd[@]}" start="$SECONDS" retry=1 errlvls=() while true; do lock "$lock_label" -v -D -k -- "${cmd[@]}" errlvl="$?" case "$errlvl" in 20) ## Received SIGUSR1, SIGINTT echo "!! Rsync received SIGUSR1 or SIGINT." echo " .. Full interruption while $d -> $dest and after $((SECONDS - start))s" append_trim "${state_dir}/${session_id}-fail" \ "$dest $d $((SECONDS - start)) signal SIGUSR1, SIGINT or SIGHUP" break 2 ;; 137|143) ## killed SIGKILL, SIGTERM echo "!! Rsync received $(kill -l "$errlvl")" echo " .. Full interruption while $d -> $dest and after $((SECONDS - start))s" append_trim "${state_dir}/${session_id}-fail" \ "$dest $d $((SECONDS - start)) signal: $(kill -l "$errlvl")" break 2 ;; 0) echo "Rsync finished with success $d -> $dest in $((SECONDS - start))s" append_trim "${state_dir}/${session_id}-success" \ "$dest $d $((SECONDS - start)) OK" break ;; *) errlvls+=("$errlvl") echo "!! Rsync failed with an errorlevel $errlvl after $((SECONDS - start))s since start." if [ "$retry" -lt 3 ]; then echo "!! Triggering a retry ($((++retry))/3)" continue else echo "!! Tried 3 times, bailing out." echo " .. interruption of $d -> $dest after $((SECONDS - start))s" append_trim "${state_dir}/${session_id}-fail" \ "$dest $d $((SECONDS - start))" \ "Failed after 3 retries (errorlevels: ${errlvls[@]})" break fi ;; esac done if [ -n "$has_exclude_pattern" ]; then rm -fv "$tmp_exclude_patterns" fi done done } get_exclude_patterns() { local dir="$1" [ -e "$config_file" ] || return cat "$config_file" | shyaml get-values-0 "${dir//.\\./}.exclude" 2>/dev/null } append_trim() { local f="$1" shift e "$(date --rfc-3339=s) $*"$'\n' >> "$f" && tail -n 5000 "$f" > "$f".tmp && mv "$f"{.tmp,} } log_tee() { tee -a "$MIRROR_DIR_LOG"; } log_file() { cat >> "$MIRROR_DIR_LOG"; } get_ids() { local session_id id_done declare -A id_done for file in "$state_dir"/*{-fail,-success}; do session_id=${file%-*} [ "${id_done["$session_id"]}" ] && continue id_done["$session_id"]=1 echo "${session_id##*/}" done } mirror-dir:_get_sources() { local DIR=("$@") config_file="/etc/$exname/config.yml" if [ "${#DIR[@]}" == 0 ]; then if [ -e "$config_file" ]; then info "No source provided on command line," \ "reading '$config_file' for default sources" DIR=($(eval echo $(shyaml get-values default.sources < "$config_file"))) fi fi array_values_to_stdin DIR } [[ "${BASH_SOURCE[0]}" != "${0}" ]] && SOURCED=true version=0.1 desc='Manage mirroring of local directory to distant hosts' help="" ## ## Code ## cmdline.spec.gnu cmdline.spec.gnu backup cmdline.spec:backup:valued:-d,--dest:run() { dests+=("$1") } dests=() cmdline.spec::cmd:backup:run() { # usage="usage: $exname -d DEST1 [-d DEST2 [...]] [-u USER] [DIR1 [DIR2 ...]] # Preserve as much as possible the source structure, keeping hard-links, acl, # exact numerical uids and gids, and being able to resume in very large files. # " : :posarg: [DIR...] 'Local directories that should be mirrored on destination(s). Examples: /etc /home /var/backups If no directories are provided, the config file root entries will be used all as destination to copy.' : :optval: -d,--dest 'Can be repeated. Specifies host destination towards which files will be send. Note that you can specify port number after a colon and a bandwidth limit for rsync after a '/'. Examples: -d liszt.musicalta:10022 -d 10.8.0.19/200' : :optval: -u,--user "(default: 'rsync') Local AND destination system user to log as at both ends to transfer file. This local user need to have a no password ssh login to it's own account on destination. This destination account should have full permissions access without passwd to write with rsync-server in the destination directory." : :optval: -h,--hostname "(default is taken of the hostname file) Set the destination store, this is the name of the directory where the files will all directories will be copied. Beware ! if 2 hosts use the same store, this means they'll conflictingly update the same destination directory. Only use this if you know what you are doing." : :optfla: -q,--quiet "Prevent output on stderr. Please note that output is always written in log file." [ "$UID" != "0" ] && echo "You must be root." && exit 1 [ -n "$opt_hostname" ] || opt_hostname=$(hostname) if [ -n "$opt_quiet" ]; then log_facility=log_file else log_facility=log_tee fi if [ -z "$opt_hostname" ]; then err "Couldn't figure a valid hostname. Please specify one with \`\`-h STORENAME\`\`." return 1 fi user=${opt_user:-rsync} config_file="/etc/$exname/config.yml" array_read-0 DIR < <( { { mirror-dir:_get_sources "${DIR[@]}" } 3>&1 1>&2 2>&3 | "$log_facility" } 3>&1 1>&2 2>&3 ) if [ "${#DIR[@]}" == 0 ]; then err "You must specify at least one source directory to mirror" \ "on command line (or in a config file)." echo "$usage" >&2 exit 1 fi info "Source directories are: ${DIR[@]}" 2>&1 | "$log_facility" if [ "${#dests[@]}" == 0 ]; then err "You must specify at least a destination (using \`\`-d\`\` or \`\`--dest\`\`)." echo "$usage" >&2 return 1 fi ## XXXvlab: note that we use here a special version of awk supporting ## ``strftime``. This is only to prefix a date to the logs. Yes, we know ## about ``--out-format`` and its ``%t`` which would be ideal, but it ## doesn't output proper UTC time (it is system time, no timezone info). mirror-dir:run "$opt_hostname" "${dests[*]}" "${DIR[@]}" 2>&1 | awk -W interactive '{ print strftime("%Y-%m-%d %H:%M:%S%z"), $0 }' | "$log_facility" } cmdline.spec.gnu report cmdline.spec::cmd:report:run() { mirror-dir:report } cmdline.spec:check:valued:-d,--dest:run() { dests+=("$1") } cmdline.spec.gnu check cmdline.spec::cmd:check:run() { # usage="usage: $exname -d DEST1 [-d DEST2 [...]] [DIR1 [DIR2 ...]] # Checks that mirror-dir did it's job. Will send an email if not. # " : :posarg: [DIR...] 'Local directories that should be mirrored on destination(s). Examples: /etc /home /var/backups If no directories are provided, the config file root entries will be used all as destination to copy.' : :optval: -d,--dest 'Can be repeated. Specifies host destination towards which files will be send. Note that you can specify port number after a colon and a bandwidth limit for rsync after a '/'. Examples: -d liszt.musicalta:10022 -d 10.8.0.19/200' : :optval: -n,--time-spec "Give a full English time spec about how old the last full run of rsync should be at most. Defaults to '12 hours'. Examples: -n '12 hours' -n '1 day'" : :optfla: -m,--mail-alert "Send alert via email. This is intended to use in cron." [ "$UID" != "0" ] && echo "You must be root." && exit 1 if [ "${#dests[@]}" == 0 ]; then err "You must specify at least a destination (using \`\`-d\`\` or \`\`--dest\`\`)." echo "$usage" >&2 return 1 fi if [ -n "$opt_mail_alert" ]; then CHECK_DEFAULT_SOURCE=/etc/default/alerting [ -f "$CHECK_DEFAULT_SOURCE" ] && . "$CHECK_DEFAULT_SOURCE" if [ "${#MAIL_DESTS[@]}" == 0 ]; then echo "You must set at least one recipient destination for mails." >&2 echo " You can do that in '$CHECK_DEFAULT_SOURCE', using the variable" >&2 echo " '\$MAIL_DESTS'. Note this is a bash array variable." >&2 exit 1 fi fi array_read-0 DIR < <(mirror-dir:_get_sources "${DIR[@]}") if [ "${#DIR[@]}" == 0 ]; then err "You must specify at least one source directory to mirror" \ "on command line (or in a config file)." echo "$usage" >&2 exit 1 fi time_spec="${opt_time_spec:-12 hours}" state_dir=/var/run/mirror-dir ## Getting max string length of source dir_max_len=0 for d in "${DIR[@]}"; do [ "$dir_max_len" -lt "${#d}" ] && dir_max_len="${#d}" done ## Getting max string length of dests dest_max_len=0 for d in "${dests[@]}"; do [ "$dest_max_len" -lt "${#d}" ] && dest_max_len="${#d}" done declare -A sessions=() bad_sessions=() msg=() for dest in "${dests[@]}"; do if [[ "$dest" == *"/"* ]]; then current_rsync_options+=("--bwlimit" "${dest##*/}") dest="${dest%/*}" fi for d in "${DIR[@]}"; do session_id="$(echo "$dest$d" | md5_compat)" session_id="${session_id:1:8}" sessions["$session_id"]="$dest $d" f=$(find "$state_dir" \ -maxdepth 1 -newermt "-$time_spec" \ -type f -name "${session_id}-success") if [ -z "$f" ]; then if [ -e "$state_dir/${session_id}-success" ]; then msg+=("$(printf "%-${dest_max_len}s %-${dir_max_len}s last full sync %s" \ "$dest" "$d" \ "$(stat -c %y "$state_dir/${session_id}-success" | sed -r 's/\.[0-9]{9,9} / /g')")") else msg+=("$(printf "%-${dest_max_len}s %-${dir_max_len}s never finished yet" \ "$dest" "$d")") fi bad_sessions+=("$session_id") fi done done [ "${#msg[@]}" == 0 ] && return 0 if [ -z "$opt_mail_alert" ]; then echo echo "${DARKRED}These destination/source directory were" \ "last synced more than $time_spec ago:${NORMAL}" for m in "${msg[@]}"; do printf " %s\n" "$m" done echo echo "${DARKRED}Last failed logs:${NORMAL}" for m in "${bad_sessions[@]}"; do if [ -e "${state_dir}"/$m-fail ]; then echo " ${sessions[$m]}:" tail -n 5 "${state_dir}"/$m-fail | cut -f 1,2,5- -d " " | sed -r "s/^/ /g" echo else echo " ${sessions[$m]}: no fail log available" fi done return 1 fi ## ## Mail ## if [ "${#msg[@]}" != 0 ]; then cat <