From c5b32875cc724d5b8d23c707d1cf79668315d844 Mon Sep 17 00:00:00 2001 From: Valentin Lab Date: Thu, 5 Dec 2024 10:35:51 +0100 Subject: [PATCH] new: [rsync-backup-target] manage sqlite database of log chunks for stats --- rsync-backup-target/build/Dockerfile | 17 ++- .../src/usr/local/sbin/import-log-chunks | 108 ++++++++++++++++++ rsync-backup-target/hooks/init | 75 ++++++++++++ rsync-backup-target/metadata.yml | 8 ++ 4 files changed, 206 insertions(+), 2 deletions(-) create mode 100755 rsync-backup-target/build/src/usr/local/sbin/import-log-chunks diff --git a/rsync-backup-target/build/Dockerfile b/rsync-backup-target/build/Dockerfile index e57911c..83377ed 100644 --- a/rsync-backup-target/build/Dockerfile +++ b/rsync-backup-target/build/Dockerfile @@ -1,11 +1,12 @@ -FROM alpine:3.9 +FROM alpine:3.20 MAINTAINER Valentin Lab ## coreutils is for ``date`` support of ``--rfc-3339=seconds`` argument. ## findutils is for ``find`` support of ``--newermt`` argument. ## gawk is for ``awk`` support of unicode strings. -RUN apk add rsync sudo bash openssh-server coreutils findutils gawk +## btrfs-progs is for ``btrfs`` support for snapshotting capacity +RUN apk add rsync sudo bash openssh-server coreutils findutils gawk btrfs-progs RUN ssh-keygen -A ## New user/group rsync/rsync with home dir in /var/lib/rsync @@ -27,6 +28,18 @@ RUN chmod 440 /etc/sudoers.d/* RUN mkdir /var/run/sshd +ENV SCRIPT_LOGCHUNK_SHA="0.1.0" + +RUN apk add curl; export pkg ; \ + for pkg in logchunk; do \ + echo "Getting $pkg..." ; \ + bash -c -- 'varname=${pkg^^} ; varname=${varname//-/_} ; \ + eval curl https://docker.0k.io/downloads/$pkg-\${SCRIPT_${varname^^}_SHA}' > \ + /usr/local/bin/"$pkg" || exit 1 ; \ + chmod +x /usr/local/bin/"$pkg" ; \ + done + + COPY ./entrypoint.sh /entrypoint.sh EXPOSE 22 diff --git a/rsync-backup-target/build/src/usr/local/sbin/import-log-chunks b/rsync-backup-target/build/src/usr/local/sbin/import-log-chunks new file mode 100755 index 0000000..5607da8 --- /dev/null +++ b/rsync-backup-target/build/src/usr/local/sbin/import-log-chunks @@ -0,0 +1,108 @@ +#!/bin/bash + +RSYNC_LOG_PATH="${RSYNC_LOG_PATH:-/var/log/rsync}" +RSYNC_DB_FILE="${RSYNC_DB_FILE:-$RSYNC_LOG_PATH/logchunks.db}" +RSYNC_FAILED_CHUNKS_PATH="${RSYNC_FAILED_CHUNKS_PATH:-$RSYNC_LOG_PATH/failed_chunks}" + +is_btrfs_subvolume() { + local dir=$1 + [ "$(stat -f --format="%T" "$dir")" == "btrfs" ] || return 1 + inode="$(stat --format="%i" "$dir")" + case "$inode" in + 2|256) + return 0;; + *) + return 1;; + esac +} + +time_now() { date +%s.%3N; } +time_elapsed() { echo "scale=3; $2 - $1" | bc; } + + + +if ! [ -d "$RSYNC_LOG_PATH" ]; then + echo "Error: RSYNC_LOG_PATH is not a directory: $RSYNC_LOG_PATH" >&2 + exit 1 +fi + +if ! is_btrfs_subvolume "$RSYNC_LOG_PATH"; then + echo "Error: RSYNC_LOG_PATH is not a Btrfs subvolume: $RSYNC_LOG_PATH" >&2 + exit 1 +fi + +for cmd in btrfs logchunk; do + if ! type -p "$cmd" >/dev/null; then + echo "Error: $cmd command not found" >&2 + exit 1 + fi +done + +if ! [ -d "$RSYNC_FAILED_CHUNKS_PATH" ]; then + mkdir -p "$RSYNC_FAILED_CHUNKS_PATH" || { + echo "Error: Failed to create RSYNC_FAILED_CHUNKS_PATH directory: $RSYNC_FAILED_CHUNKS_PATH" >&2 + exit 1 + } +fi + +rsync_log_work_dir="${RSYNC_LOG_PATH}.logchunk" +if [ -e "$rsync_log_work_dir" ]; then + echo "Error: RSYNC_LOG_PATH work directory already exists: $rsync_log_work_dir" >&2 + exit 1 +fi + +btrfs subvolume snapshot -r "$RSYNC_LOG_PATH" "$rsync_log_work_dir" || { + echo "Error: Failed to create snapshot of RSYNC_LOG_PATH" >&2 + exit 1 +} +trap "btrfs subvolume delete '$rsync_log_work_dir'" EXIT + +start=$(time_now) + +for log_file in "$rsync_log_work_dir"/target_*_rsync.log; do + ident="${log_file##*/}" + ident="${ident#target_}" + ident="${ident%_rsync.log}" + errors=0 + chunks=0 + start_ident=$(time_now) + start_log_line="${start_ident%.*}" + echo "$ident:" + last_chunk_count=0 + last_error_count=0 + while true; do + logchunk next -c logchunk "$log_file" | + logchunk import "${RSYNC_DB_FILE}" "$ident" "$RSYNC_FAILED_CHUNKS_PATH" 2>&1 | + sed -r "s/^/ | /" + pipe_status=("${PIPESTATUS[@]}") + if [ "${pipe_status[0]}" == 1 ]; then + ## no new chunks + break + fi + if [ "${pipe_status[0]}" == 127 ]; then + echo "Error: fatal !" >&2 + exit 1 + fi + errlvl="${pipe_status[1]}" + if [ "$errlvl" != 0 ]; then + errors=$((errors + 1)) + fi + chunks=$((chunks + 1)) + now=$(time_now) + now="${now%.*}" + if [ $((now - start_log_line)) -gt 15 ]; then + rate=$(echo "scale=2; ($chunks - $last_chunk_count) / ($now - $start_log_line)" | bc) + echo " |~ processed $((chunks - last_chunk_count)) chunks with $((errors - last_error_count)) errors ($rate chunks/s)" + start_log_line="$now" + last_chunk_count=$chunks + last_error_count=$errors + fi + done + if [ "$chunks" != 0 ]; then + elapsed_ident="$(time_elapsed "$start_ident" "$(time_now)")" || exit 1 + echo " .. processed $chunks chunks with $errors errors in ${elapsed_ident}s" + fi +done + +elapsed="$(time_elapsed "$start" "$(time_now)")" || exit 1 +echo "Processed all logs in ${elapsed}s" diff --git a/rsync-backup-target/hooks/init b/rsync-backup-target/hooks/init index dc53ea6..338b3bc 100755 --- a/rsync-backup-target/hooks/init +++ b/rsync-backup-target/hooks/init @@ -55,6 +55,19 @@ rebuild-config() { e "$control_users" > "$CONTROL_USERS_FILE" } +is_btrfs_subvolume() { + local dir=$1 + [ "$(stat -f --format="%T" "$dir")" == "btrfs" ] || return 1 + inode="$(stat --format="%i" "$dir")" + case "$inode" in + 2|256) + return 0;; + *) + return 1;; + esac +} + + local_path_key=/etc/rsync/keys/admin host_path_key="$SERVICE_CONFIGSTORE${local_path_key}" @@ -63,12 +76,74 @@ CONTROL_USERS_FILE="$SERVICE_DATASTORE/.control-pass" ## Was it already properly propagated to database ? control_users=$(H "${admin_keys}" "$(declare -f "rebuild-config")") +if ! out=$(stat -f -c %T "$SERVICE_DATASTORE"/var/log 2>&1); then + err "Command 'stat' failed with error:" + echo "$out" | prefix " ${GRAY}|${NORMAL} " >&2 + exit 1 +fi + +compose_fragments="" +if [ "$out" == "btrfs" ]; then + ## We'll need to add SYS_ADMIN capability to the container to + ## allow it to delete snapshots + compose_fragments+="\ + cap_add: + - SYS_ADMIN +" + + RSYNC_LOG_PATH="$SERVICE_DATASTORE/var/log/rsync" + if ! is_btrfs_subvolume "$RSYNC_LOG_PATH"; then + previous_contents= + if [ -e "$RSYNC_LOG_PATH" ]; then + previous_contents=1 + info "Directory '$RSYNC_LOG_PATH' exists but is not a btrfs subvolume." + ## we want to keep the data, so we'll move it to a temporary location + mv "$RSYNC_LOG_PATH" "${RSYNC_LOG_PATH}.bak" + fi + if ! out=$(btrfs subvolume create "$RSYNC_LOG_PATH" 2>&1); then + err "Command 'btrfs subvolume create' failed with error:" + echo "$out" | prefix " ${GRAY}|${NORMAL} " >&2 + if [ -n "$previous_contents" ]; then + info "Restoring previous contents of '$RSYNC_LOG_PATH'" + mv "${RSYNC_LOG_PATH}.bak" "$RSYNC_LOG_PATH" || exit 1 + fi + exit 1 + fi + if [ -n "$previous_contents" ]; then + info "Moving previous contents of '$RSYNC_LOG_PATH' into the new subvolume." + ( + ## avoid sending our env to find, + ## to prevent any "The environment is too large for exec" error + env -i PATH=/bin:/usr/bin \ + find "${RSYNC_LOG_PATH}.bak" \ + -mindepth 1 -maxdepth 1 \ + -exec cp -a {} "$RSYNC_LOG_PATH/" \; + ) && rm -rf "${RSYNC_LOG_PATH}.bak" || { + err "Failed to copy previous contents of '$RSYNC_LOG_PATH' into the new subvolume." + rmdir "$RSYNC_LOG_PATH" || { + err "Failed to delete the newly created subvolume." + echo " Couldn't restore previous state !!" >&2 + exit 1 + } + mv "${RSYNC_LOG_PATH}.bak" "$RSYNC_LOG_PATH" || { + err "Failed to restore previous contents of '$RSYNC_LOG_PATH'." + echo " Couldn't restore previous state !!" >&2 + exit 1 + } + exit 1 + } + fi + fi +fi + + init-config-add "\ $SERVICE_NAME: volumes: - $host_path_key:$local_path_key labels: - compose.config_hash=$control_users +$compose_fragments " if [ -e "$CONTROL_USERS_FILE" ] && [ "$control_users" == "$(cat "$CONTROL_USERS_FILE")" ]; then diff --git a/rsync-backup-target/metadata.yml b/rsync-backup-target/metadata.yml index 1fb0a23..0362d5b 100644 --- a/rsync-backup-target/metadata.yml +++ b/rsync-backup-target/metadata.yml @@ -3,6 +3,7 @@ data-resources: - /etc/rsync/keys - /var/mirror - /var/log/rsync + - /var/spool/logchunk uses: log-rotate: @@ -13,3 +14,10 @@ uses: solves: unmanaged-logs: "in docker logs" #default-options: + schedule-command: + constraint: required + auto: summon + solves: + missing-feature: "import log chunk in database" + default-options: !var-expand + (0 * * * *) {-p 10 -D} dc exec -T "$MASTER_BASE_SERVICE_NAME" import-log-chunks