Files
Docker-Backup-Script/docker-recover.sh
T

348 lines
11 KiB
Bash

#!/bin/bash
#
# Docker services recovery script
# Restores /opt/<service> directories from an Internxt backup via rclone
#
# Usage:
# ./docker-recover.sh # interactive: lists backups and prompts
# ./docker-recover.sh latest # restore the most recent backup
# ./docker-recover.sh <archive-name> # restore a specific archive
# ./docker-recover.sh --dry-run [target] # show what would happen without changing anything
#
# Special handling for Netbird: the management database is restored into the
# Docker named volume by starting the container, copying the data in via
# `docker compose cp`, then restarting.
#
set -euo pipefail
# ---- Configuration ----
SERVICES=("netbird" "pocket-id" "caddy" "vaultwarden")
TARGET_DIR="/opt"
RCLONE_REMOTE="internxt"
RCLONE_PATH="vps-backups"
LOCAL_TMP="/tmp/docker-recovery"
ARCHIVE_PREFIX="strato-docker" # matches your renamed tarball
LOG_FILE="/var/log/docker-recover.log"
# Netbird-specific
NETBIRD_DIR="${TARGET_DIR}/netbird"
NETBIRD_DB_PATH_IN_CONTAINER="/var/lib/netbird"
NETBIRD_DB_STAGING_SUBDIR="_netbird-db"
# ---- Parse args ----
DRY_RUN=0
POSITIONAL=()
while [[ $# -gt 0 ]]; do
case "$1" in
--dry-run|-n)
DRY_RUN=1
shift
;;
-h|--help)
sed -n '2,15p' "$0" | sed 's/^# \{0,1\}//'
exit 0
;;
--)
shift
POSITIONAL+=("$@")
break
;;
-*)
echo "Unknown option: $1" >&2
exit 1
;;
*)
POSITIONAL+=("$1")
shift
;;
esac
done
set -- "${POSITIONAL[@]:-}"
# ---- Setup ----
mkdir -p "$LOCAL_TMP"
log() {
local prefix=""
[[ $DRY_RUN -eq 1 ]] && prefix="[DRY-RUN] "
echo "[$(date '+%Y-%m-%d %H:%M:%S')] ${prefix}$*" | tee -a "$LOG_FILE"
}
err() {
log "ERROR: $*"
exit 1
}
confirm() {
local prompt="$1"
if [[ $DRY_RUN -eq 1 ]]; then
log "(dry-run) would prompt: ${prompt} -- assuming YES"
return 0
fi
read -r -p "$prompt [y/N] " response
[[ "$response" =~ ^[Yy]$ ]]
}
# Run a command, or just describe it in dry-run mode.
# Use for state-changing operations (writes, container ops, deletes, downloads).
run() {
if [[ $DRY_RUN -eq 1 ]]; then
log "WOULD RUN: $*"
else
"$@"
fi
}
# ---- Pre-flight checks ----
log "===== Recovery started ====="
[[ $DRY_RUN -eq 1 ]] && log "Running in dry-run mode — no changes will be made"
if [[ $EUID -ne 0 ]]; then
err "This script must be run as root (or with sudo)"
fi
if ! command -v rclone &>/dev/null; then
err "rclone not found"
fi
if ! command -v docker &>/dev/null; then
err "docker not found"
fi
if ! rclone listremotes | grep -q "^${RCLONE_REMOTE}:$"; then
err "rclone remote '${RCLONE_REMOTE}' not configured"
fi
# ---- Pick which archive to restore ----
ARCHIVE_ARG="${1:-}"
list_remote_archives() {
rclone lsf "${RCLONE_REMOTE}:${RCLONE_PATH}/" \
--include "${ARCHIVE_PREFIX}_*.tar.gz" \
--files-only 2>/dev/null | sort
}
if [[ -z "$ARCHIVE_ARG" ]]; then
log "Available backups on ${RCLONE_REMOTE}:${RCLONE_PATH}/:"
mapfile -t archives < <(list_remote_archives)
if [[ ${#archives[@]} -eq 0 ]]; then
err "No backups found matching ${ARCHIVE_PREFIX}_*.tar.gz"
fi
for i in "${!archives[@]}"; do
printf " [%d] %s\n" "$((i+1))" "${archives[$i]}"
done
echo
if [[ $DRY_RUN -eq 1 ]]; then
SELECTED_ARCHIVE="${archives[-1]}"
log "(dry-run) auto-selecting most recent: ${SELECTED_ARCHIVE}"
else
read -r -p "Enter number to restore (or 'latest' for most recent): " choice
if [[ "$choice" == "latest" ]]; then
SELECTED_ARCHIVE="${archives[-1]}"
elif [[ "$choice" =~ ^[0-9]+$ ]] && (( choice >= 1 && choice <= ${#archives[@]} )); then
SELECTED_ARCHIVE="${archives[$((choice-1))]}"
else
err "Invalid selection"
fi
fi
elif [[ "$ARCHIVE_ARG" == "latest" ]]; then
mapfile -t archives < <(list_remote_archives)
[[ ${#archives[@]} -gt 0 ]] || err "No backups found"
SELECTED_ARCHIVE="${archives[-1]}"
else
SELECTED_ARCHIVE="$ARCHIVE_ARG"
fi
log "Selected archive: ${SELECTED_ARCHIVE}"
# ---- Confirm destructive action ----
echo
if [[ $DRY_RUN -eq 1 ]]; then
echo "DRY RUN: showing what would happen, but no changes will be made."
else
echo "WARNING: This will:"
echo " 1. Stop all running containers in: ${SERVICES[*]}"
echo " 2. Replace the contents of ${TARGET_DIR}/<service> for each service"
echo " 3. Restore Netbird's management database into its Docker volume"
fi
echo
if ! confirm "Continue?"; then
log "Aborted by user"
exit 0
fi
# ---- Download archive ----
LOCAL_ARCHIVE="${LOCAL_TMP}/${SELECTED_ARCHIVE}"
if [[ -f "$LOCAL_ARCHIVE" ]]; then
log "Archive already present locally: ${LOCAL_ARCHIVE}"
if [[ $DRY_RUN -eq 0 ]] && confirm "Re-download from remote?"; then
rm -f "$LOCAL_ARCHIVE"
fi
fi
if [[ ! -f "$LOCAL_ARCHIVE" ]]; then
log "Downloading ${SELECTED_ARCHIVE} from ${RCLONE_REMOTE}:${RCLONE_PATH}/"
if [[ $DRY_RUN -eq 1 ]]; then
log "WOULD RUN: rclone copy ${RCLONE_REMOTE}:${RCLONE_PATH}/${SELECTED_ARCHIVE} ${LOCAL_TMP}/"
log "(dry-run) skipping download — extraction and restore steps will be described only"
else
rclone copy "${RCLONE_REMOTE}:${RCLONE_PATH}/${SELECTED_ARCHIVE}" "$LOCAL_TMP/" \
--log-file="$LOG_FILE" \
--log-level INFO \
--stats=30s
[[ -f "$LOCAL_ARCHIVE" ]] || err "Download failed: ${LOCAL_ARCHIVE} not found"
fi
fi
# ---- Extract to staging ----
EXTRACT_DIR="${LOCAL_TMP}/extract_$(date +%s)"
if [[ $DRY_RUN -eq 1 && ! -f "$LOCAL_ARCHIVE" ]]; then
# Without a real archive we can't inspect contents; describe what would happen
log "WOULD: extract ${SELECTED_ARCHIVE} to ${EXTRACT_DIR}"
log "(dry-run) cannot inspect archive contents because it wasn't downloaded"
log "(dry-run) assuming archive contains: ${SERVICES[*]} and netbird/${NETBIRD_DB_STAGING_SUBDIR}"
ARCHIVE_CONTENTS_KNOWN=0
else
run mkdir -p "$EXTRACT_DIR"
log "Extracting archive to ${EXTRACT_DIR}"
if [[ $DRY_RUN -eq 1 ]]; then
# We have the archive locally; peek without extracting
log "Archive top-level entries:"
tar -tzf "$LOCAL_ARCHIVE" | awk -F/ '{print $1}' | sort -u | sed 's/^/ /' | tee -a "$LOG_FILE"
ARCHIVE_CONTENTS_KNOWN=1
else
tar -xzf "$LOCAL_ARCHIVE" -C "$EXTRACT_DIR"
ARCHIVE_CONTENTS_KNOWN=1
fi
fi
# Sanity check: the archive should contain the service directories at the top level
if [[ $DRY_RUN -eq 0 ]]; then
for service in "${SERVICES[@]}"; do
if [[ ! -d "${EXTRACT_DIR}/${service}" ]]; then
log "WARNING: ${service} not found in archive (skipping)"
fi
done
fi
# ---- Stop existing containers ----
for service in "${SERVICES[@]}"; do
service_dir="${TARGET_DIR}/${service}"
if [[ -d "$service_dir" ]] && [[ -f "${service_dir}/docker-compose.yml" || -f "${service_dir}/compose.yml" ]]; then
log "Stopping existing ${service}..."
if [[ $DRY_RUN -eq 1 ]]; then
log "WOULD RUN: (cd ${service_dir} && docker compose down)"
else
(cd "$service_dir" && docker compose down) || \
log "WARNING: failed to stop ${service} (may not exist yet)"
fi
else
log "No existing ${service} deployment at ${service_dir} (nothing to stop)"
fi
done
# ---- Restore each service's bind-mount directory ----
for service in "${SERVICES[@]}"; do
src="${EXTRACT_DIR}/${service}"
dest="${TARGET_DIR}/${service}"
if [[ $DRY_RUN -eq 0 && ! -d "$src" ]]; then
continue
fi
if [[ -d "$dest" ]]; then
backup_aside="${dest}.pre-recovery.$(date +%s)"
log "Moving existing ${dest} -> ${backup_aside}"
run mv "$dest" "$backup_aside"
fi
log "Restoring ${service} -> ${dest}"
if [[ $DRY_RUN -eq 1 ]]; then
log "WOULD RUN: cp -a ${src} ${dest}"
else
cp -a "$src" "$dest"
fi
done
# ---- Restore Netbird database into named volume ----
NETBIRD_DB_SRC="${TARGET_DIR}/netbird/${NETBIRD_DB_STAGING_SUBDIR}"
# In dry-run we don't have files in /opt yet, so describe what would happen
if [[ $DRY_RUN -eq 1 ]]; then
log "WOULD: detect Netbird management container (netbird-server vs management)"
log "WOULD RUN: (cd ${NETBIRD_DIR} && docker compose up --no-start)"
log "WOULD RUN: (cd ${NETBIRD_DIR} && docker compose cp -a ${NETBIRD_DB_SRC}/. <container>:${NETBIRD_DB_PATH_IN_CONTAINER}/)"
log "WOULD: remove staging subdir ${NETBIRD_DB_SRC}"
elif [[ -d "$NETBIRD_DB_SRC" ]]; then
log "Found Netbird DB backup at ${NETBIRD_DB_SRC}"
cd "$NETBIRD_DIR"
NETBIRD_DB_CONTAINER=""
if docker compose config --services 2>/dev/null | grep -qx "netbird-server"; then
NETBIRD_DB_CONTAINER="netbird-server"
elif docker compose config --services 2>/dev/null | grep -qx "management"; then
NETBIRD_DB_CONTAINER="management"
fi
if [[ -z "$NETBIRD_DB_CONTAINER" ]]; then
log "WARNING: could not detect Netbird management container; skipping DB restore"
else
log "Detected Netbird management container: ${NETBIRD_DB_CONTAINER}"
log "Creating Netbird containers/volumes (without starting)..."
docker compose up --no-start
if [[ -d "${NETBIRD_DB_SRC}/netbird" ]]; then
DB_SOURCE="${NETBIRD_DB_SRC}/netbird/."
else
DB_SOURCE="${NETBIRD_DB_SRC}/."
fi
log "Copying database from ${DB_SOURCE} into ${NETBIRD_DB_CONTAINER}:${NETBIRD_DB_PATH_IN_CONTAINER}"
docker compose cp -a "$DB_SOURCE" \
"${NETBIRD_DB_CONTAINER}:${NETBIRD_DB_PATH_IN_CONTAINER}/"
log "Cleaning up staging subdir from ${NETBIRD_DIR}"
rm -rf "$NETBIRD_DB_SRC"
fi
else
log "No Netbird DB backup found in archive (${NETBIRD_DB_SRC} missing)"
fi
# ---- Bring everything up ----
for service in "${SERVICES[@]}"; do
service_dir="${TARGET_DIR}/${service}"
if [[ $DRY_RUN -eq 1 ]]; then
log "WOULD RUN: (cd ${service_dir} && docker compose up -d)"
continue
fi
[[ -d "$service_dir" ]] || continue
[[ -f "${service_dir}/docker-compose.yml" || -f "${service_dir}/compose.yml" ]] || continue
log "Starting ${service}..."
(cd "$service_dir" && docker compose up -d)
done
# ---- Cleanup ----
log "Cleaning up extraction directory"
if [[ -d "$EXTRACT_DIR" ]]; then
run rm -rf "$EXTRACT_DIR"
fi
if [[ $DRY_RUN -eq 1 ]]; then
log "===== Dry-run complete ====="
log "No changes were made. Re-run without --dry-run to actually restore."
else
log "===== Recovery complete ====="
log ""
log "Next steps:"
log " - Verify each service is running: docker ps"
log " - Check service logs: cd /opt/<service> && docker compose logs"
log " - If DNS has changed, update your domain records"
log " - The pre-recovery state of /opt/<service> is preserved as"
log " /opt/<service>.pre-recovery.<timestamp> (delete when satisfied)"
fi