Browse Source

new: [rsync-backup-target] manage sqlite database of log chunks for stats

rsync-stats
Valentin Lab 3 weeks ago
parent
commit
c5b32875cc
  1. 17
      rsync-backup-target/build/Dockerfile
  2. 108
      rsync-backup-target/build/src/usr/local/sbin/import-log-chunks
  3. 75
      rsync-backup-target/hooks/init
  4. 8
      rsync-backup-target/metadata.yml

17
rsync-backup-target/build/Dockerfile

@ -1,11 +1,12 @@
FROM alpine:3.9
FROM alpine:3.20
MAINTAINER Valentin Lab <valentin.lab@kalysto.org> MAINTAINER Valentin Lab <valentin.lab@kalysto.org>
## coreutils is for ``date`` support of ``--rfc-3339=seconds`` argument. ## coreutils is for ``date`` support of ``--rfc-3339=seconds`` argument.
## findutils is for ``find`` support of ``--newermt`` argument. ## findutils is for ``find`` support of ``--newermt`` argument.
## gawk is for ``awk`` support of unicode strings. ## gawk is for ``awk`` support of unicode strings.
RUN apk add rsync sudo bash openssh-server coreutils findutils gawk
## btrfs-progs is for ``btrfs`` support for snapshotting capacity
RUN apk add rsync sudo bash openssh-server coreutils findutils gawk btrfs-progs
RUN ssh-keygen -A RUN ssh-keygen -A
## New user/group rsync/rsync with home dir in /var/lib/rsync ## New user/group rsync/rsync with home dir in /var/lib/rsync
@ -27,6 +28,18 @@ RUN chmod 440 /etc/sudoers.d/*
RUN mkdir /var/run/sshd RUN mkdir /var/run/sshd
ENV SCRIPT_LOGCHUNK_SHA="0.1.0"
RUN apk add curl; export pkg ; \
for pkg in logchunk; do \
echo "Getting $pkg..." ; \
bash -c -- 'varname=${pkg^^} ; varname=${varname//-/_} ; \
eval curl https://docker.0k.io/downloads/$pkg-\${SCRIPT_${varname^^}_SHA}' > \
/usr/local/bin/"$pkg" || exit 1 ; \
chmod +x /usr/local/bin/"$pkg" ; \
done
COPY ./entrypoint.sh /entrypoint.sh COPY ./entrypoint.sh /entrypoint.sh
EXPOSE 22 EXPOSE 22

108
rsync-backup-target/build/src/usr/local/sbin/import-log-chunks

@ -0,0 +1,108 @@
#!/bin/bash
RSYNC_LOG_PATH="${RSYNC_LOG_PATH:-/var/log/rsync}"
RSYNC_DB_FILE="${RSYNC_DB_FILE:-$RSYNC_LOG_PATH/logchunks.db}"
RSYNC_FAILED_CHUNKS_PATH="${RSYNC_FAILED_CHUNKS_PATH:-$RSYNC_LOG_PATH/failed_chunks}"
is_btrfs_subvolume() {
local dir=$1
[ "$(stat -f --format="%T" "$dir")" == "btrfs" ] || return 1
inode="$(stat --format="%i" "$dir")"
case "$inode" in
2|256)
return 0;;
*)
return 1;;
esac
}
time_now() { date +%s.%3N; }
time_elapsed() { echo "scale=3; $2 - $1" | bc; }
if ! [ -d "$RSYNC_LOG_PATH" ]; then
echo "Error: RSYNC_LOG_PATH is not a directory: $RSYNC_LOG_PATH" >&2
exit 1
fi
if ! is_btrfs_subvolume "$RSYNC_LOG_PATH"; then
echo "Error: RSYNC_LOG_PATH is not a Btrfs subvolume: $RSYNC_LOG_PATH" >&2
exit 1
fi
for cmd in btrfs logchunk; do
if ! type -p "$cmd" >/dev/null; then
echo "Error: $cmd command not found" >&2
exit 1
fi
done
if ! [ -d "$RSYNC_FAILED_CHUNKS_PATH" ]; then
mkdir -p "$RSYNC_FAILED_CHUNKS_PATH" || {
echo "Error: Failed to create RSYNC_FAILED_CHUNKS_PATH directory: $RSYNC_FAILED_CHUNKS_PATH" >&2
exit 1
}
fi
rsync_log_work_dir="${RSYNC_LOG_PATH}.logchunk"
if [ -e "$rsync_log_work_dir" ]; then
echo "Error: RSYNC_LOG_PATH work directory already exists: $rsync_log_work_dir" >&2
exit 1
fi
btrfs subvolume snapshot -r "$RSYNC_LOG_PATH" "$rsync_log_work_dir" || {
echo "Error: Failed to create snapshot of RSYNC_LOG_PATH" >&2
exit 1
}
trap "btrfs subvolume delete '$rsync_log_work_dir'" EXIT
start=$(time_now)
for log_file in "$rsync_log_work_dir"/target_*_rsync.log; do
ident="${log_file##*/}"
ident="${ident#target_}"
ident="${ident%_rsync.log}"
errors=0
chunks=0
start_ident=$(time_now)
start_log_line="${start_ident%.*}"
echo "$ident:"
last_chunk_count=0
last_error_count=0
while true; do
logchunk next -c logchunk "$log_file" |
logchunk import "${RSYNC_DB_FILE}" "$ident" "$RSYNC_FAILED_CHUNKS_PATH" 2>&1 |
sed -r "s/^/ | /"
pipe_status=("${PIPESTATUS[@]}")
if [ "${pipe_status[0]}" == 1 ]; then
## no new chunks
break
fi
if [ "${pipe_status[0]}" == 127 ]; then
echo "Error: fatal !" >&2
exit 1
fi
errlvl="${pipe_status[1]}"
if [ "$errlvl" != 0 ]; then
errors=$((errors + 1))
fi
chunks=$((chunks + 1))
now=$(time_now)
now="${now%.*}"
if [ $((now - start_log_line)) -gt 15 ]; then
rate=$(echo "scale=2; ($chunks - $last_chunk_count) / ($now - $start_log_line)" | bc)
echo " |~ processed $((chunks - last_chunk_count)) chunks with $((errors - last_error_count)) errors ($rate chunks/s)"
start_log_line="$now"
last_chunk_count=$chunks
last_error_count=$errors
fi
done
if [ "$chunks" != 0 ]; then
elapsed_ident="$(time_elapsed "$start_ident" "$(time_now)")" || exit 1
echo " .. processed $chunks chunks with $errors errors in ${elapsed_ident}s"
fi
done
elapsed="$(time_elapsed "$start" "$(time_now)")" || exit 1
echo "Processed all logs in ${elapsed}s"

75
rsync-backup-target/hooks/init

@ -55,6 +55,19 @@ rebuild-config() {
e "$control_users" > "$CONTROL_USERS_FILE" e "$control_users" > "$CONTROL_USERS_FILE"
} }
is_btrfs_subvolume() {
local dir=$1
[ "$(stat -f --format="%T" "$dir")" == "btrfs" ] || return 1
inode="$(stat --format="%i" "$dir")"
case "$inode" in
2|256)
return 0;;
*)
return 1;;
esac
}
local_path_key=/etc/rsync/keys/admin local_path_key=/etc/rsync/keys/admin
host_path_key="$SERVICE_CONFIGSTORE${local_path_key}" host_path_key="$SERVICE_CONFIGSTORE${local_path_key}"
@ -63,12 +76,74 @@ CONTROL_USERS_FILE="$SERVICE_DATASTORE/.control-pass"
## Was it already properly propagated to database ? ## Was it already properly propagated to database ?
control_users=$(H "${admin_keys}" "$(declare -f "rebuild-config")") control_users=$(H "${admin_keys}" "$(declare -f "rebuild-config")")
if ! out=$(stat -f -c %T "$SERVICE_DATASTORE"/var/log 2>&1); then
err "Command 'stat' failed with error:"
echo "$out" | prefix " ${GRAY}|${NORMAL} " >&2
exit 1
fi
compose_fragments=""
if [ "$out" == "btrfs" ]; then
## We'll need to add SYS_ADMIN capability to the container to
## allow it to delete snapshots
compose_fragments+="\
cap_add:
- SYS_ADMIN
"
RSYNC_LOG_PATH="$SERVICE_DATASTORE/var/log/rsync"
if ! is_btrfs_subvolume "$RSYNC_LOG_PATH"; then
previous_contents=
if [ -e "$RSYNC_LOG_PATH" ]; then
previous_contents=1
info "Directory '$RSYNC_LOG_PATH' exists but is not a btrfs subvolume."
## we want to keep the data, so we'll move it to a temporary location
mv "$RSYNC_LOG_PATH" "${RSYNC_LOG_PATH}.bak"
fi
if ! out=$(btrfs subvolume create "$RSYNC_LOG_PATH" 2>&1); then
err "Command 'btrfs subvolume create' failed with error:"
echo "$out" | prefix " ${GRAY}|${NORMAL} " >&2
if [ -n "$previous_contents" ]; then
info "Restoring previous contents of '$RSYNC_LOG_PATH'"
mv "${RSYNC_LOG_PATH}.bak" "$RSYNC_LOG_PATH" || exit 1
fi
exit 1
fi
if [ -n "$previous_contents" ]; then
info "Moving previous contents of '$RSYNC_LOG_PATH' into the new subvolume."
(
## avoid sending our env to find,
## to prevent any "The environment is too large for exec" error
env -i PATH=/bin:/usr/bin \
find "${RSYNC_LOG_PATH}.bak" \
-mindepth 1 -maxdepth 1 \
-exec cp -a {} "$RSYNC_LOG_PATH/" \;
) && rm -rf "${RSYNC_LOG_PATH}.bak" || {
err "Failed to copy previous contents of '$RSYNC_LOG_PATH' into the new subvolume."
rmdir "$RSYNC_LOG_PATH" || {
err "Failed to delete the newly created subvolume."
echo " Couldn't restore previous state !!" >&2
exit 1
}
mv "${RSYNC_LOG_PATH}.bak" "$RSYNC_LOG_PATH" || {
err "Failed to restore previous contents of '$RSYNC_LOG_PATH'."
echo " Couldn't restore previous state !!" >&2
exit 1
}
exit 1
}
fi
fi
fi
init-config-add "\ init-config-add "\
$SERVICE_NAME: $SERVICE_NAME:
volumes: volumes:
- $host_path_key:$local_path_key - $host_path_key:$local_path_key
labels: labels:
- compose.config_hash=$control_users - compose.config_hash=$control_users
$compose_fragments
" "
if [ -e "$CONTROL_USERS_FILE" ] && [ "$control_users" == "$(cat "$CONTROL_USERS_FILE")" ]; then if [ -e "$CONTROL_USERS_FILE" ] && [ "$control_users" == "$(cat "$CONTROL_USERS_FILE")" ]; then

8
rsync-backup-target/metadata.yml

@ -3,6 +3,7 @@ data-resources:
- /etc/rsync/keys - /etc/rsync/keys
- /var/mirror - /var/mirror
- /var/log/rsync - /var/log/rsync
- /var/spool/logchunk
uses: uses:
log-rotate: log-rotate:
@ -13,3 +14,10 @@ uses:
solves: solves:
unmanaged-logs: "in docker logs" unmanaged-logs: "in docker logs"
#default-options: #default-options:
schedule-command:
constraint: required
auto: summon
solves:
missing-feature: "import log chunk in database"
default-options: !var-expand
(0 * * * *) {-p 10 -D} dc exec -T "$MASTER_BASE_SERVICE_NAME" import-log-chunks
Loading…
Cancel
Save