#!/bin/bash

##
## Here's an example crontab:
##
##   SHELL=/bin/sh
##   PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
##
##   49 */2 * * *   root   mirror-dir run -d core-05.0k.io:10023 -u rsync /etc /home /opt/apps 2>&1 | logger -t mirror-dir
##


#:-
. /etc/shlib
#:-

include common
include parse
include process
include cmdline
include array

depends shyaml lock


##
## Functions
##


MIRROR_DIR_LOG=/var/log/mirror-dir.log
MIRROR_DIR_REPORT_MAX_READ_LINE=1000000
R_DATE='[0-9]{4,4}-[01][0-9]-[0-3][0-9] [012][0-9]:[0-5][0-9]:[0-5][0-9][+-][01][0-9][0-5][0-9]'

mirror-dir:report() {

    local s1 s2 s3 d1 d2 d3 host source sent received rate

    while read s1 s2 d1 d2 host source sent received rate; do
        s=$(date -d"$s1 $s2" --rfc-3339=seconds)
        s_s=$(date -d"$s1 $s2" +%s)
        d_s=$(date -d"$d1 $d2" +%s)
        duration=$((d_s - s_s))
        printf "%s %-15s %-30s | %s %s %s %10s\n" \
               "$s" "$host" "$source" "$sent" "$received" "$rate" "$(print_duration "$duration")"
    done < <(
        tail "$MIRROR_DIR_LOG" -n "$MIRROR_DIR_REPORT_MAX_READ_LINE" |
            egrep "^${R_DATE} (Starting|sent)" |
            sed -r 's/\s*\(.*\)$//g
              s/ (([0-9]{1,3},)*[0-9]{1,3})(\.[0-9]{2,2})? bytes(\/sec)?/:\1/g
              s/,//g
              s/ :([0-9]+)$/ rate:\1/g' |
            grep -v "^--$" |
            sed -r "/Starting/N;
              {s/\n(${R_DATE} )(.*)sent/ \1 sent/g}" |
            sed -r "s/^(${R_DATE} )Starting rsync: ([^ ]+) -> ([^ ]+) (${R_DATE} )/\1\4\3 \2/g
              s/ +/ /g
              s/ [a-z]+:/ /g" |
            egrep "^${R_DATE} ${R_DATE} [^ ]+ /[^ ]+ [0-9]+ [0-9]+ [0-9]+$"
    ) |
        numfmt --field=6,7 --to=iec-i --suffix=B --padding=8 |
        numfmt --field=8 --to=iec-i --suffix=B/s --padding=10 |
        sed -r 's/ \| /  /g'
}


mirror-dir:run() {

    local hostname="$1" dests="$2" source_dirs
    shift 2

    dests=($dests)          ## individual dests can't use any space-like separators
    source_dirs=("$@")

    dest_path=/var/mirror/$hostname
    state_dir=/var/run/mirror-dir
    mkdir -p "$state_dir"
    rsync_options=(
        ${RSYNC_OPTIONS:-} --stats --out-format='%i %o %f %l %b')
    ssh_options=(${SSH_OPTIONS:--o StrictHostKeyChecking=no})

    for dest in "${dests[@]}"; do
        dest_rsync_options=("${rsync_options[@]}")
        if [[ "$dest" == *"/"* ]]; then
            dest_rsync_options+=("--bwlimit" "${dest##*/}")
            dest="${dest%/*}"
        fi
        dest_for_session="$dest"

        for d in "${source_dirs[@]}"; do

            current_rsync_options=("${dest_rsync_options[@]}")

            session_id="$(echo "${dest_for_session}$d" | md5_compat)"
            session_id="${session_id:1:8}"

            if [[ "$dest" == *":"* ]]; then
                ssh_options+=("-p" "${dest#*:}")
                dest="${dest%%:*}"
            fi

            dirpath="$(dirname "$d")"
            if [ "$dirpath" == "/" ]; then
                dir="/$(basename "$d")"
            else
                dir="$dirpath/$(basename "$d")"
            fi

            [ -d "$dir" ] || {
                warn "ignoring '$dir' as it is not existing."
                continue
            }

            lock_label=$exname-$hostname-${session_id}

            tmp_exclude_patterns=/tmp/${lock_label}.exclude_patterns.tmp
            ## Adding the base of the dir if required... seems necessary with
            ## the rsync option that replicate the full path.
            has_exclude_pattern=
            while read-0 exclude_dir; do
                if [ -z "$has_exclude_pattern" ]; then
                    echo "Adding exclude patterns for source '$dir':" >&2
                    has_exclude_pattern=1
                fi
                if [[ "$exclude_dir" == "/"* ]]; then
                    exclude_dir="$dir${exclude_dir}"
                fi
                echo "  - $exclude_dir" >&2
                p0 "$exclude_dir"
            done < <(get_exclude_patterns "$dir") > "$tmp_exclude_patterns"
            if [ -n "$has_exclude_pattern" ]; then
                current_rsync_options+=("-0" "--exclude-from"="$tmp_exclude_patterns")
            else
                echo "No exclude patterns for '$dir'."
            fi
            echo ---------------------------------
            echo "Starting rsync: $d -> $dest ($(date))"
            cmd=(
                nice -n 15 \
                 rsync "${current_rsync_options[@]}" -azvARH \
                 -e "sudo -u $user ssh ${ssh_options[*]}" \
                 --delete --delete-excluded \
                 --partial --partial-dir .rsync-partial \
                 --numeric-ids "$dir/" "$user@$dest":"$dest_path"
            )
            echo "${cmd[@]}"
            start="$SECONDS"
            retry=1
            errlvls=()
            while true; do
                lock "$lock_label" -v -D -k -- "${cmd[@]}"
                errlvl="$?"
                case "$errlvl" in
                    20)  ## Received SIGUSR1, SIGINTT
                        echo "!! Rsync received SIGUSR1 or SIGINT."
                        echo "   .. Full interruption while $d -> $dest and after $((SECONDS - start))s"
                        append_trim "${state_dir}/${session_id}-fail" \
                                    "$dest $d $((SECONDS - start)) signal SIGUSR1, SIGINT or SIGHUP"
                        break 2
                        ;;
                    137|143)  ## killed SIGKILL, SIGTERM
                        echo "!! Rsync received $(kill -l "$errlvl")"
                        echo "   .. Full interruption while $d -> $dest and after $((SECONDS - start))s"
                        append_trim "${state_dir}/${session_id}-fail" \
                                    "$dest $d $((SECONDS - start)) signal: $(kill -l "$errlvl")"
                        break 2
                        ;;
                    0)
                        echo "Rsync finished with success $d -> $dest in $((SECONDS - start))s"
                        append_trim "${state_dir}/${session_id}-success" \
                                    "$dest $d $((SECONDS - start)) OK"
                        break
                        ;;
                    *)
                        errlvls+=("$errlvl")
                        echo "!! Rsync failed with an errorlevel $errlvl after $((SECONDS - start))s since start."
                        if [ "$retry" -lt 3 ]; then
                            echo "!! Triggering a retry ($((++retry))/3)"
                            continue
                        else
                            echo "!! Tried 3 times, bailing out."
                            echo "   .. interruption of $d -> $dest after $((SECONDS - start))s"
                            append_trim "${state_dir}/${session_id}-fail" \
                                        "$dest $d $((SECONDS - start))" \
                                        "Failed after 3 retries (errorlevels: ${errlvls[@]})"
                            break
                        fi
                        ;;
                esac
            done
            if [ -n "$has_exclude_pattern" ]; then
                rm -fv "$tmp_exclude_patterns"
            fi
        done
    done

}



get_exclude_patterns() {
    local dir="$1"
    [ -e "$config_file" ] || return
    cat "$config_file" | shyaml get-values-0 "${dir//.\\./}.exclude" 2>/dev/null
}

append_trim() {
    local f="$1"
    shift
    e "$(date --rfc-3339=s) $*"$'\n' >> "$f" &&
    tail -n 5000 "$f" > "$f".tmp &&
    mv "$f"{.tmp,}
}


log_tee() { tee -a "$MIRROR_DIR_LOG"; }
log_file() { cat >> "$MIRROR_DIR_LOG"; }


get_ids() {
    local session_id id_done
    declare -A id_done
    for file in "$state_dir"/*{-fail,-success}; do
        session_id=${file%-*}
        [ "${id_done["$session_id"]}" ] && continue
        id_done["$session_id"]=1
        echo "${session_id##*/}"
    done
}


mirror-dir:_get_sources() {
    local DIR=("$@")

    config_file="/etc/$exname/config.yml"

    if [ "${#DIR[@]}" == 0 ]; then
        if [ -e "$config_file" ]; then
            info "No source provided on command line," \
                 "reading '$config_file' for default sources"
            DIR=($(eval echo $(shyaml get-values default.sources < "$config_file")))
        fi
    fi
    array_values_to_stdin DIR
}


[[ "${BASH_SOURCE[0]}" != "${0}" ]] && SOURCED=true

version=0.1
desc='Manage mirroring of local directory to distant hosts'
help=""



##
## Code
##



cmdline.spec.gnu


cmdline.spec.gnu backup

cmdline.spec:backup:valued:-d,--dest:run() {
    dests+=("$1")
}

dests=()

cmdline.spec::cmd:backup:run() {

#     usage="usage: $exname -d DEST1 [-d DEST2 [...]] [-u USER] [DIR1 [DIR2 ...]]

# Preserve as much as possible the source structure, keeping hard-links, acl,
# exact numerical uids and gids, and being able to resume in very large files.


# "

    : :posarg: [DIR...]    'Local directories that should be mirrored
                            on destination(s).

	                        Examples: /etc /home /var/backups

                            If no directories are provided, the config
                            file root entries will be used all as
                            destination to copy.'

    : :optval: -d,--dest   'Can be repeated. Specifies host
                            destination towards which files will be
                            send. Note that you can specify port
                            number after a colon and a bandwidth limit
                            for rsync after a '/'.

        	                Examples: -d liszt.musicalta:10022
                                      -d 10.8.0.19/200'


    : :optval: -u,--user   "(default: 'rsync')

        	                Local AND destination system user to log
        	                as at both ends to transfer file.  This
        	                local user need to have a no password ssh
        	                login to it's own account on destination.
        	                This destination account should have full
        	                permissions access without passwd to write
        	                with rsync-server in the destination
        	                directory."

    : :optval: -h,--hostname "(default is taken of the hostname file)

                               Set the destination store, this is the
                               name of the directory where the files
                               will all directories will be copied.
                               Beware ! if 2 hosts use the same store,
                               this means they'll conflictingly update
                               the same destination directory.  Only
                               use this if you know what you are
                               doing."

    : :optfla: -q,--quiet   "Prevent output on stderr. Please note that
                             output is always written in log file."


    [ "$UID" != "0" ] && echo "You must be root." && exit 1

    [ -n "$opt_hostname" ] || opt_hostname=$(hostname)

    if [ -n "$opt_quiet" ]; then
        log_facility=log_file
    else
        log_facility=log_tee
    fi

    if [ -z "$opt_hostname" ]; then
        err "Couldn't figure a valid hostname. Please specify one with \`\`-h STORENAME\`\`."
        return 1
    fi

    user=${opt_user:-rsync}

    config_file="/etc/$exname/config.yml"

     array_read-0 DIR < <(
         {
             {
                 mirror-dir:_get_sources "${DIR[@]}"
             } 3>&1 1>&2 2>&3  | "$log_facility"
         } 3>&1 1>&2 2>&3
     )

    if [ "${#DIR[@]}" == 0 ]; then
        err "You must specify at least one source directory to mirror" \
            "on command line (or in a config file)."
        echo "$usage" >&2
        exit 1
    fi
    info "Source directories are: ${DIR[@]}" 2>&1 | "$log_facility"

    if [ "${#dests[@]}" == 0 ]; then
        err "You must specify at least a destination (using \`\`-d\`\` or \`\`--dest\`\`)."
        echo "$usage" >&2
        return 1
    fi

    ## XXXvlab: note that we use here a special version of awk supporting
    ## ``strftime``. This is only to prefix a date to the logs. Yes, we know
    ## about ``--out-format`` and its ``%t`` which would be ideal, but it
    ## doesn't output proper UTC time (it is system time, no timezone info).
    mirror-dir:run "$opt_hostname" "${dests[*]}" "${DIR[@]}" 2>&1 |
        awk -W interactive '{ print strftime("%Y-%m-%d %H:%M:%S%z"), $0 }' |
        "$log_facility"

}



cmdline.spec.gnu report
cmdline.spec::cmd:report:run() {
    mirror-dir:report
}



cmdline.spec:check:valued:-d,--dest:run() {
    dests+=("$1")
}

cmdline.spec.gnu check
cmdline.spec::cmd:check:run() {

#         usage="usage: $exname -d DEST1 [-d DEST2 [...]] [DIR1 [DIR2 ...]]

# Checks that mirror-dir did it's job. Will send an email if not.
# "


    : :posarg: [DIR...]    'Local directories that should be mirrored
                            on destination(s).

	                        Examples: /etc /home /var/backups

                            If no directories are provided, the config
                            file root entries will be used all as
                            destination to copy.'

    : :optval: -d,--dest   'Can be repeated. Specifies host
                            destination towards which files will be
                            send. Note that you can specify port
                            number after a colon and a bandwidth limit
                            for rsync after a '/'.

        	                Examples: -d liszt.musicalta:10022
                                      -d 10.8.0.19/200'

    : :optval: -n,--time-spec "Give a full English time spec about how
                               old the last full run of rsync should
                               be at most. Defaults to '12 hours'.

                               Examples: -n '12 hours'
                                         -n '1 day'"

    : :optfla: -m,--mail-alert "Send alert via email. This is intended to 
                                use in cron."


    [ "$UID" != "0" ] && echo "You must be root." && exit 1

    if [ "${#dests[@]}" == 0 ]; then
        err "You must specify at least a destination (using \`\`-d\`\` or \`\`--dest\`\`)."
        echo "$usage" >&2
        return 1
    fi

    if [ -n "$opt_mail_alert" ]; then
        CHECK_DEFAULT_SOURCE=/etc/default/alerting
        [ -f "$CHECK_DEFAULT_SOURCE" ] && . "$CHECK_DEFAULT_SOURCE"

        if [ "${#MAIL_DESTS[@]}" == 0 ]; then
            echo "You must set at least one recipient destination for mails." >&2
            echo "  You can do that in '$CHECK_DEFAULT_SOURCE', using the variable" >&2
            echo "  '\$MAIL_DESTS'. Note this is a bash array variable." >&2
            exit 1
        fi
    fi

    array_read-0 DIR < <(mirror-dir:_get_sources "${DIR[@]}")

    if [ "${#DIR[@]}" == 0 ]; then
        err "You must specify at least one source directory to mirror" \
            "on command line (or in a config file)."
        echo "$usage" >&2
        exit 1
    fi

    time_spec="${opt_time_spec:-12 hours}"

    state_dir=/var/run/mirror-dir

    ## Getting max string length of source
    dir_max_len=0
    for d in "${DIR[@]}"; do
        [ "$dir_max_len" -lt "${#d}" ] &&
            dir_max_len="${#d}"
    done

    ## Getting max string length of dests
    dest_max_len=0
    for d in "${dests[@]}"; do
        [ "$dest_max_len" -lt "${#d}" ] &&
            dest_max_len="${#d}"
    done

    declare -A sessions=()
    bad_sessions=()
    msg=()
    for dest in "${dests[@]}"; do
        if [[ "$dest" == *"/"* ]]; then
            current_rsync_options+=("--bwlimit" "${dest##*/}")
            dest="${dest%/*}"
        fi

        for d in "${DIR[@]}"; do
            session_id="$(echo "$dest$d" | md5_compat)"
            session_id="${session_id:1:8}"
            sessions["$session_id"]="$dest $d"
            f=$(find "$state_dir" \
                     -maxdepth 1 -newermt "-$time_spec" \
                     -type f -name "${session_id}-success")
            if [ -z "$f" ]; then
                if [ -e "$state_dir/${session_id}-success" ]; then
                    msg+=("$(printf "%-${dest_max_len}s %-${dir_max_len}s last full sync %s" \
                               "$dest" "$d" \
                               "$(stat -c %y "$state_dir/${session_id}-success" |
                                     sed -r 's/\.[0-9]{9,9} / /g')")")
                else
                    msg+=("$(printf "%-${dest_max_len}s %-${dir_max_len}s never finished yet" \
                               "$dest" "$d")")
                fi
                bad_sessions+=("$session_id")
            fi
        done
    done

    [ "${#msg[@]}" == 0 ] && return 0

    if [ -z "$opt_mail_alert" ]; then
        echo
        echo "${DARKRED}These destination/source directory were" \
             "last synced more than $time_spec ago:${NORMAL}"
        for m in "${msg[@]}"; do
            printf "  %s\n" "$m"
        done
        echo
        echo "${DARKRED}Last failed logs:${NORMAL}"
        for m in "${bad_sessions[@]}"; do
            if [ -e "${state_dir}"/$m-fail ]; then
                echo "  ${sessions[$m]}:"
                tail -n 5 "${state_dir}"/$m-fail | cut -f 1,2,5- -d " " | sed -r "s/^/    /g"
                echo
            else
                echo "  ${sessions[$m]}: no fail log available"
            fi
        done
        return 1
    fi


    ##
    ## Mail
    ##

    if [ "${#msg[@]}" != 0 ]; then

        cat <<EOF | mail -s "[$(hostname)] mirror backup failing" "${MAIL_DESTS[@]}"
Hi,

  Some configured mirroring targets have not finished gracefully in
  the last $time_spec. Please see for yourself:

$(
    for m in "${msg[@]}"; do
        echo "    $m"
    done
)

  You might want to find these following information of some use:

$(

    for m in "${bad_sessions[@]}"; do
        if [ -e "${state_dir}"/$m-fail ]; then
           echo "    ${sessions[$m]}:"
           tail -n 5 "${state_dir}"/$m-fail | cut -f 1,2,5- -d " " | sed -r "s/^/      /g"
           echo
        else
           echo "    ${sessions[$m]}: no fail log available"
        fi
    done

)

  Hoping all this will help you sort out the issue...

  Yours sincerly,
--
mirror-dir-check

PS: You received this email because your email is listed in
\$MAIL_DESTS of '$CHECK_DEFAULT_SOURCE' of '$(hostname)'
(also known as $(cat /etc/mailname)).

EOF

    fi


}




cmdline::parse "$@"