586 lines
18 KiB

  1. #!/bin/bash
  2. ##
  3. ## Here's an example crontab:
  4. ##
  5. ## SHELL=/bin/sh
  6. ## PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
  7. ##
  8. ## 49 */2 * * * root mirror-dir run -d core-05.0k.io:10023 -u rsync /etc /home /opt/apps 2>&1 | logger -t mirror-dir
  9. ##
  10. #:-
  11. . /etc/shlib
  12. #:-
  13. include common
  14. include parse
  15. include process
  16. include cmdline
  17. include array
  18. depends shyaml lock
  19. ##
  20. ## Functions
  21. ##
  22. MIRROR_DIR_LOG=/var/log/mirror-dir.log
  23. MIRROR_DIR_REPORT_MAX_READ_LINE=1000000
  24. R_DATE='[0-9]{4,4}-[01][0-9]-[0-3][0-9] [012][0-9]:[0-5][0-9]:[0-5][0-9][+-][01][0-9][0-5][0-9]'
  25. mirror-dir:report() {
  26. local s1 s2 s3 d1 d2 d3 host source sent received rate
  27. while read s1 s2 d1 d2 host source sent received rate; do
  28. s=$(date -d"$s1 $s2" --rfc-3339=seconds)
  29. s_s=$(date -d"$s1 $s2" +%s)
  30. d_s=$(date -d"$d1 $d2" +%s)
  31. duration=$((d_s - s_s))
  32. printf "%s %-15s %-30s | %s %s %s %10s\n" \
  33. "$s" "$host" "$source" "$sent" "$received" "$rate" "$(print_duration "$duration")"
  34. done < <(
  35. tail "$MIRROR_DIR_LOG" -n "$MIRROR_DIR_REPORT_MAX_READ_LINE" |
  36. egrep "^${R_DATE} (Starting|sent)" |
  37. sed -r 's/\s*\(.*\)$//g
  38. s/ (([0-9]{1,3},)*[0-9]{1,3})(\.[0-9]{2,2})? bytes(\/sec)?/:\1/g
  39. s/,//g
  40. s/ :([0-9]+)$/ rate:\1/g' |
  41. grep -v "^--$" |
  42. sed -r "/Starting/N;
  43. {s/\n(${R_DATE} )(.*)sent/ \1 sent/g}" |
  44. sed -r "s/^(${R_DATE} )Starting rsync: ([^ ]+) -> ([^ ]+) (${R_DATE} )/\1\4\3 \2/g
  45. s/ +/ /g
  46. s/ [a-z]+:/ /g" |
  47. egrep "^${R_DATE} ${R_DATE} [^ ]+ /[^ ]+ [0-9]+ [0-9]+ [0-9]+$"
  48. ) |
  49. numfmt --field=6,7 --to=iec-i --suffix=B --padding=8 |
  50. numfmt --field=8 --to=iec-i --suffix=B/s --padding=10 |
  51. sed -r 's/ \| / /g'
  52. }
  53. mirror-dir:run() {
  54. local hostname="$1" dests="$2" source_dirs
  55. shift 2
  56. dests=($dests) ## individual dests can't use any space-like separators
  57. source_dirs=("$@")
  58. dest_path=/var/mirror/$hostname
  59. state_dir=/var/run/mirror-dir
  60. mkdir -p "$state_dir"
  61. rsync_options=(
  62. ${RSYNC_OPTIONS:-} --stats --out-format='%i %o %f %l %b')
  63. ssh_options=(${SSH_OPTIONS:--o StrictHostKeyChecking=no})
  64. for dest in "${dests[@]}"; do
  65. dest_rsync_options=("${rsync_options[@]}")
  66. if [[ "$dest" == *"/"* ]]; then
  67. dest_rsync_options+=("--bwlimit" "${dest##*/}")
  68. dest="${dest%/*}"
  69. fi
  70. dest_for_session="$dest"
  71. for d in "${source_dirs[@]}"; do
  72. current_rsync_options=("${dest_rsync_options[@]}")
  73. session_id="$(echo "${dest_for_session}$d" | md5_compat)"
  74. session_id="${session_id:1:8}"
  75. if [[ "$dest" == *":"* ]]; then
  76. ssh_options+=("-p" "${dest#*:}")
  77. dest="${dest%%:*}"
  78. fi
  79. dirpath="$(dirname "$d")"
  80. if [ "$dirpath" == "/" ]; then
  81. dir="/$(basename "$d")"
  82. else
  83. dir="$dirpath/$(basename "$d")"
  84. fi
  85. [ -d "$dir" ] || {
  86. warn "ignoring '$dir' as it is not existing."
  87. continue
  88. }
  89. lock_label=$exname-$hostname-${session_id}
  90. tmp_exclude_patterns=/tmp/${lock_label}.exclude_patterns.tmp
  91. ## Adding the base of the dir if required... seems necessary with
  92. ## the rsync option that replicate the full path.
  93. has_exclude_pattern=
  94. while read-0 exclude_dir; do
  95. if [ -z "$has_exclude_pattern" ]; then
  96. echo "Adding exclude patterns for source '$dir':" >&2
  97. has_exclude_pattern=1
  98. fi
  99. if [[ "$exclude_dir" == "/"* ]]; then
  100. exclude_dir="$dir${exclude_dir}"
  101. fi
  102. echo " - $exclude_dir" >&2
  103. p0 "$exclude_dir"
  104. done < <(get_exclude_patterns "$dir") > "$tmp_exclude_patterns"
  105. if [ -n "$has_exclude_pattern" ]; then
  106. current_rsync_options+=("-0" "--exclude-from"="$tmp_exclude_patterns")
  107. else
  108. echo "No exclude patterns for '$dir'."
  109. fi
  110. echo ---------------------------------
  111. echo "Starting rsync: $d -> $dest ($(date))"
  112. cmd=(
  113. nice -n 15 \
  114. rsync "${current_rsync_options[@]}" -azvARH \
  115. -e "sudo -u $user ssh ${ssh_options[*]}" \
  116. --delete --delete-excluded \
  117. --partial --partial-dir .rsync-partial \
  118. --numeric-ids "$dir/" "$user@$dest":"$dest_path"
  119. )
  120. echo "${cmd[@]}"
  121. start="$SECONDS"
  122. retry=1
  123. errlvls=()
  124. while true; do
  125. lock "$lock_label" -v -D -k -- "${cmd[@]}"
  126. errlvl="$?"
  127. case "$errlvl" in
  128. 20) ## Received SIGUSR1, SIGINTT
  129. echo "!! Rsync received SIGUSR1 or SIGINT."
  130. echo " .. Full interruption while $d -> $dest and after $((SECONDS - start))s"
  131. append_trim "${state_dir}/${session_id}-fail" \
  132. "$dest $d $((SECONDS - start)) signal SIGUSR1, SIGINT or SIGHUP"
  133. break 2
  134. ;;
  135. 137|143) ## killed SIGKILL, SIGTERM
  136. echo "!! Rsync received $(kill -l "$errlvl")"
  137. echo " .. Full interruption while $d -> $dest and after $((SECONDS - start))s"
  138. append_trim "${state_dir}/${session_id}-fail" \
  139. "$dest $d $((SECONDS - start)) signal: $(kill -l "$errlvl")"
  140. break 2
  141. ;;
  142. 0)
  143. echo "Rsync finished with success $d -> $dest in $((SECONDS - start))s"
  144. append_trim "${state_dir}/${session_id}-success" \
  145. "$dest $d $((SECONDS - start)) OK"
  146. break
  147. ;;
  148. *)
  149. errlvls+=("$errlvl")
  150. echo "!! Rsync failed with an errorlevel $errlvl after $((SECONDS - start))s since start."
  151. if [ "$retry" -lt 3 ]; then
  152. echo "!! Triggering a retry ($((++retry))/3)"
  153. continue
  154. else
  155. echo "!! Tried 3 times, bailing out."
  156. echo " .. interruption of $d -> $dest after $((SECONDS - start))s"
  157. append_trim "${state_dir}/${session_id}-fail" \
  158. "$dest $d $((SECONDS - start))" \
  159. "Failed after 3 retries (errorlevels: ${errlvls[@]})"
  160. break
  161. fi
  162. ;;
  163. esac
  164. done
  165. if [ -n "$has_exclude_pattern" ]; then
  166. rm -fv "$tmp_exclude_patterns"
  167. fi
  168. done
  169. done
  170. }
  171. get_exclude_patterns() {
  172. local dir="$1"
  173. [ -e "$config_file" ] || return
  174. cat "$config_file" | shyaml get-values-0 "${dir//.\\./}.exclude" 2>/dev/null
  175. }
  176. append_trim() {
  177. local f="$1"
  178. shift
  179. e "$(date --rfc-3339=s) $*"$'\n' >> "$f" &&
  180. tail -n 5000 "$f" > "$f".tmp &&
  181. mv "$f"{.tmp,}
  182. }
  183. log_tee() { tee -a "$MIRROR_DIR_LOG"; }
  184. log_file() { cat >> "$MIRROR_DIR_LOG"; }
  185. get_ids() {
  186. local session_id id_done
  187. declare -A id_done
  188. for file in "$state_dir"/*{-fail,-success}; do
  189. session_id=${file%-*}
  190. [ "${id_done["$session_id"]}" ] && continue
  191. id_done["$session_id"]=1
  192. echo "${session_id##*/}"
  193. done
  194. }
  195. mirror-dir:_get_sources() {
  196. local DIR=("$@")
  197. config_file="/etc/$exname/config.yml"
  198. if [ "${#DIR[@]}" == 0 ]; then
  199. if [ -e "$config_file" ]; then
  200. info "No source provided on command line," \
  201. "reading '$config_file' for default sources"
  202. DIR=($(eval echo $(shyaml get-values default.sources < "$config_file")))
  203. fi
  204. fi
  205. array_values_to_stdin DIR
  206. }
  207. [[ "${BASH_SOURCE[0]}" != "${0}" ]] && SOURCED=true
  208. version=0.1
  209. desc='Manage mirroring of local directory to distant hosts'
  210. help=""
  211. ##
  212. ## Code
  213. ##
  214. cmdline.spec.gnu
  215. cmdline.spec.gnu backup
  216. cmdline.spec:backup:valued:-d,--dest:run() {
  217. dests+=("$1")
  218. }
  219. dests=()
  220. cmdline.spec::cmd:backup:run() {
  221. # usage="usage: $exname -d DEST1 [-d DEST2 [...]] [-u USER] [DIR1 [DIR2 ...]]
  222. # Preserve as much as possible the source structure, keeping hard-links, acl,
  223. # exact numerical uids and gids, and being able to resume in very large files.
  224. # "
  225. : :posarg: [DIR...] 'Local directories that should be mirrored
  226. on destination(s).
  227. Examples: /etc /home /var/backups
  228. If no directories are provided, the config
  229. file root entries will be used all as
  230. destination to copy.'
  231. : :optval: -d,--dest 'Can be repeated. Specifies host
  232. destination towards which files will be
  233. send. Note that you can specify port
  234. number after a colon and a bandwidth limit
  235. for rsync after a '/'.
  236. Examples: -d liszt.musicalta:10022
  237. -d 10.8.0.19/200'
  238. : :optval: -u,--user "(default: 'rsync')
  239. Local AND destination system user to log
  240. as at both ends to transfer file. This
  241. local user need to have a no password ssh
  242. login to it's own account on destination.
  243. This destination account should have full
  244. permissions access without passwd to write
  245. with rsync-server in the destination
  246. directory."
  247. : :optval: -h,--hostname "(default is taken of the hostname file)
  248. Set the destination store, this is the
  249. name of the directory where the files
  250. will all directories will be copied.
  251. Beware ! if 2 hosts use the same store,
  252. this means they'll conflictingly update
  253. the same destination directory. Only
  254. use this if you know what you are
  255. doing."
  256. : :optfla: -q,--quiet "Prevent output on stderr. Please note that
  257. output is always written in log file."
  258. [ "$UID" != "0" ] && echo "You must be root." && exit 1
  259. [ -n "$opt_hostname" ] || opt_hostname=$(hostname)
  260. if [ -n "$opt_quiet" ]; then
  261. log_facility=log_file
  262. else
  263. log_facility=log_tee
  264. fi
  265. if [ -z "$opt_hostname" ]; then
  266. err "Couldn't figure a valid hostname. Please specify one with \`\`-h STORENAME\`\`."
  267. return 1
  268. fi
  269. user=${opt_user:-rsync}
  270. config_file="/etc/$exname/config.yml"
  271. array_read-0 DIR < <(
  272. {
  273. {
  274. mirror-dir:_get_sources "${DIR[@]}"
  275. } 3>&1 1>&2 2>&3 | "$log_facility"
  276. } 3>&1 1>&2 2>&3
  277. )
  278. if [ "${#DIR[@]}" == 0 ]; then
  279. err "You must specify at least one source directory to mirror" \
  280. "on command line (or in a config file)."
  281. echo "$usage" >&2
  282. exit 1
  283. fi
  284. info "Source directories are: ${DIR[@]}" 2>&1 | "$log_facility"
  285. if [ "${#dests[@]}" == 0 ]; then
  286. err "You must specify at least a destination (using \`\`-d\`\` or \`\`--dest\`\`)."
  287. echo "$usage" >&2
  288. return 1
  289. fi
  290. ## XXXvlab: note that we use here a special version of awk supporting
  291. ## ``strftime``. This is only to prefix a date to the logs. Yes, we know
  292. ## about ``--out-format`` and its ``%t`` which would be ideal, but it
  293. ## doesn't output proper UTC time (it is system time, no timezone info).
  294. mirror-dir:run "$opt_hostname" "${dests[*]}" "${DIR[@]}" 2>&1 |
  295. awk -W interactive '{ print strftime("%Y-%m-%d %H:%M:%S%z"), $0 }' |
  296. "$log_facility"
  297. }
  298. cmdline.spec.gnu report
  299. cmdline.spec::cmd:report:run() {
  300. mirror-dir:report
  301. }
  302. cmdline.spec:check:valued:-d,--dest:run() {
  303. dests+=("$1")
  304. }
  305. cmdline.spec.gnu check
  306. cmdline.spec::cmd:check:run() {
  307. # usage="usage: $exname -d DEST1 [-d DEST2 [...]] [DIR1 [DIR2 ...]]
  308. # Checks that mirror-dir did it's job. Will send an email if not.
  309. # "
  310. : :posarg: [DIR...] 'Local directories that should be mirrored
  311. on destination(s).
  312. Examples: /etc /home /var/backups
  313. If no directories are provided, the config
  314. file root entries will be used all as
  315. destination to copy.'
  316. : :optval: -d,--dest 'Can be repeated. Specifies host
  317. destination towards which files will be
  318. send. Note that you can specify port
  319. number after a colon and a bandwidth limit
  320. for rsync after a '/'.
  321. Examples: -d liszt.musicalta:10022
  322. -d 10.8.0.19/200'
  323. : :optval: -n,--time-spec "Give a full English time spec about how
  324. old the last full run of rsync should
  325. be at most. Defaults to '12 hours'.
  326. Examples: -n '12 hours'
  327. -n '1 day'"
  328. : :optfla: -m,--mail-alert "Send alert via email. This is intended to
  329. use in cron."
  330. [ "$UID" != "0" ] && echo "You must be root." && exit 1
  331. if [ "${#dests[@]}" == 0 ]; then
  332. err "You must specify at least a destination (using \`\`-d\`\` or \`\`--dest\`\`)."
  333. echo "$usage" >&2
  334. return 1
  335. fi
  336. if [ -n "$opt_mail_alert" ]; then
  337. CHECK_DEFAULT_SOURCE=/etc/default/alerting
  338. [ -f "$CHECK_DEFAULT_SOURCE" ] && . "$CHECK_DEFAULT_SOURCE"
  339. if [ "${#MAIL_DESTS[@]}" == 0 ]; then
  340. echo "You must set at least one recipient destination for mails." >&2
  341. echo " You can do that in '$CHECK_DEFAULT_SOURCE', using the variable" >&2
  342. echo " '\$MAIL_DESTS'. Note this is a bash array variable." >&2
  343. exit 1
  344. fi
  345. fi
  346. array_read-0 DIR < <(mirror-dir:_get_sources "${DIR[@]}")
  347. if [ "${#DIR[@]}" == 0 ]; then
  348. err "You must specify at least one source directory to mirror" \
  349. "on command line (or in a config file)."
  350. echo "$usage" >&2
  351. exit 1
  352. fi
  353. time_spec="${opt_time_spec:-12 hours}"
  354. state_dir=/var/run/mirror-dir
  355. ## Getting max string length of source
  356. dir_max_len=0
  357. for d in "${DIR[@]}"; do
  358. [ "$dir_max_len" -lt "${#d}" ] &&
  359. dir_max_len="${#d}"
  360. done
  361. ## Getting max string length of dests
  362. dest_max_len=0
  363. for d in "${dests[@]}"; do
  364. [ "$dest_max_len" -lt "${#d}" ] &&
  365. dest_max_len="${#d}"
  366. done
  367. declare -A sessions=()
  368. bad_sessions=()
  369. msg=()
  370. for dest in "${dests[@]}"; do
  371. if [[ "$dest" == *"/"* ]]; then
  372. current_rsync_options+=("--bwlimit" "${dest##*/}")
  373. dest="${dest%/*}"
  374. fi
  375. for d in "${DIR[@]}"; do
  376. session_id="$(echo "$dest$d" | md5_compat)"
  377. session_id="${session_id:1:8}"
  378. sessions["$session_id"]="$dest $d"
  379. f=$(find "$state_dir" \
  380. -maxdepth 1 -newermt "-$time_spec" \
  381. -type f -name "${session_id}-success")
  382. if [ -z "$f" ]; then
  383. if [ -e "$state_dir/${session_id}-success" ]; then
  384. msg+=("$(printf "%-${dest_max_len}s %-${dir_max_len}s last full sync %s" \
  385. "$dest" "$d" \
  386. "$(stat -c %y "$state_dir/${session_id}-success" |
  387. sed -r 's/\.[0-9]{9,9} / /g')")")
  388. else
  389. msg+=("$(printf "%-${dest_max_len}s %-${dir_max_len}s never finished yet" \
  390. "$dest" "$d")")
  391. fi
  392. bad_sessions+=("$session_id")
  393. fi
  394. done
  395. done
  396. [ "${#msg[@]}" == 0 ] && return 0
  397. if [ -z "$opt_mail_alert" ]; then
  398. echo
  399. echo "${DARKRED}These destination/source directory were" \
  400. "last synced more than $time_spec ago:${NORMAL}"
  401. for m in "${msg[@]}"; do
  402. printf " %s\n" "$m"
  403. done
  404. echo
  405. echo "${DARKRED}Last failed logs:${NORMAL}"
  406. for m in "${bad_sessions[@]}"; do
  407. if [ -e "${state_dir}"/$m-fail ]; then
  408. echo " ${sessions[$m]}:"
  409. tail -n 5 "${state_dir}"/$m-fail | cut -f 1,2,5- -d " " | sed -r "s/^/ /g"
  410. echo
  411. else
  412. echo " ${sessions[$m]}: no fail log available"
  413. fi
  414. done
  415. return 1
  416. fi
  417. ##
  418. ## Mail
  419. ##
  420. if [ "${#msg[@]}" != 0 ]; then
  421. cat <<EOF | mail -s "[$(hostname)] mirror backup failing" "${MAIL_DESTS[@]}"
  422. Hi,
  423. Some configured mirroring targets have not finished gracefully in
  424. the last $time_spec. Please see for yourself:
  425. $(
  426. for m in "${msg[@]}"; do
  427. echo " $m"
  428. done
  429. )
  430. You might want to find these following information of some use:
  431. $(
  432. for m in "${bad_sessions[@]}"; do
  433. if [ -e "${state_dir}"/$m-fail ]; then
  434. echo " ${sessions[$m]}:"
  435. tail -n 5 "${state_dir}"/$m-fail | cut -f 1,2,5- -d " " | sed -r "s/^/ /g"
  436. echo
  437. else
  438. echo " ${sessions[$m]}: no fail log available"
  439. fi
  440. done
  441. )
  442. Hoping all this will help you sort out the issue...
  443. Yours sincerly,
  444. --
  445. mirror-dir-check
  446. PS: You received this email because your email is listed in
  447. \$MAIL_DESTS of '$CHECK_DEFAULT_SOURCE' of '$(hostname)'
  448. (also known as $(cat /etc/mailname)).
  449. EOF
  450. fi
  451. }
  452. cmdline::parse "$@"