#!/bin/bash # Comprehensive Backup Target Discovery Script # Discovers 100% of what needs to be backed up across the entire infrastructure set -euo pipefail # Configuration SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(dirname "$SCRIPT_DIR")" DISCOVERY_TIMESTAMP=$(date +%Y%m%d_%H%M%S) DISCOVERY_DIR="$PROJECT_ROOT/comprehensive_discovery_results" LOG_FILE="$PROJECT_ROOT/logs/discovery_${DISCOVERY_TIMESTAMP}.log" # Create directories mkdir -p "$DISCOVERY_DIR" "$(dirname "$LOG_FILE")" # Logging function log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" | tee -a "$LOG_FILE" } # Error handling cleanup() { log "Cleaning up temporary files..." rm -f /tmp/discovery_*.txt /tmp/docker_*.json /tmp/volume_*.txt 2>/dev/null || true } trap cleanup EXIT # Main discovery function main() { log "=== COMPREHENSIVE BACKUP TARGET DISCOVERY STARTED ===" log "Timestamp: $DISCOVERY_TIMESTAMP" log "Discovery directory: $DISCOVERY_DIR" # Discover all hosts in the infrastructure discover_hosts # Discover all Docker environments discover_docker_environments # Discover all systemd services (native services) discover_systemd_services # Discover all databases discover_databases # Discover all volumes and persistent data discover_volumes # Discover all configuration files discover_configurations # Discover all secrets and sensitive data discover_secrets # Discover all network configurations discover_network_configs # Discover all user data and applications discover_user_data # Discover all application-specific data discover_application_data # Generate comprehensive summary generate_discovery_summary log "=== DISCOVERY COMPLETE ===" log "Results saved to: $DISCOVERY_DIR" } # Discover all hosts in the infrastructure discover_hosts() { log "=== DISCOVERING ALL HOSTS ===" # Create a list of known hosts with their correct usernames from inventory cat > "$DISCOVERY_DIR/all_hosts.txt" << 'EOF' fedora:jonathan omvbackup:jon lenovo:jonathan lenovo420:jon omv800:root surface:jon audrey:jon raspberrypi:jon EOF # Check connectivity to each host while IFS=: read -r host user; do if [[ -n "$host" && -n "$user" ]]; then log "Checking connectivity to $host (user: $user)..." if ping -c 1 -W 2 "$host" >/dev/null 2>&1; then echo "$host:$user:ONLINE" >> "$DISCOVERY_DIR/host_status.txt" else echo "$host:$user:OFFLINE" >> "$DISCOVERY_DIR/host_status.txt" fi fi done < "$DISCOVERY_DIR/all_hosts.txt" # Also backup the inventory file if [[ -f "$PROJECT_ROOT/inventory.ini" ]]; then cp "$PROJECT_ROOT/inventory.ini" "$DISCOVERY_DIR/inventory_backup.txt" fi } # Discover all Docker environments discover_docker_environments() { log "=== DISCOVERING DOCKER ENVIRONMENTS ===" # Check each host for Docker while IFS=: read -r host user; do if [[ -n "$host" && -n "$user" ]]; then log "Checking Docker on $host (user: $user)..." # Check if Docker is running if ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "$user@$host" "docker --version" 2>/dev/null; then echo "$host:$user:DOCKER_AVAILABLE" >> "$DISCOVERY_DIR/docker_hosts.txt" # Get Docker info ssh "$user@$host" "docker info" > "$DISCOVERY_DIR/docker_info_${host}.txt" 2>/dev/null || true # Get all containers ssh "$user@$host" "docker ps -a --format 'table {{.Names}}\t{{.Image}}\t{{.Status}}\t{{.Ports}}'" > "$DISCOVERY_DIR/containers_${host}.txt" 2>/dev/null || true # Get all images ssh "$user@$host" "docker images --format 'table {{.Repository}}\t{{.Tag}}\t{{.Size}}'" > "$DISCOVERY_DIR/images_${host}.txt" 2>/dev/null || true # Get all networks ssh "$user@$host" "docker network ls" > "$DISCOVERY_DIR/networks_${host}.txt" 2>/dev/null || true # Get all volumes ssh "$user@$host" "docker volume ls" > "$DISCOVERY_DIR/volumes_${host}.txt" 2>/dev/null || true else echo "$host:$user:NO_DOCKER" >> "$DISCOVERY_DIR/docker_hosts.txt" fi fi done < "$DISCOVERY_DIR/all_hosts.txt" } # Discover all systemd services (native services) discover_systemd_services() { log "=== DISCOVERING SYSTEMD SERVICES ===" # Check each host for systemd services while IFS=: read -r host user; do if [[ -n "$host" && -n "$user" ]]; then log "Checking systemd services on $host (user: $user)..." # Get active services ssh "$user@$host" "systemctl list-units --type=service --state=running --full --no-pager" > "$DISCOVERY_DIR/active_services_${host}.txt" 2>/dev/null || true # Get service descriptions ssh "$user@$host" "systemctl list-units --type=service --full --no-pager" > "$DISCOVERY_DIR/service_descriptions_${host}.txt" 2>/dev/null || true # Get service dependencies ssh "$user@$host" "systemctl list-dependencies --type=service --full --no-pager" > "$DISCOVERY_DIR/service_dependencies_${host}.txt" 2>/dev/null || true fi done < "$DISCOVERY_DIR/all_hosts.txt" } # Discover all databases discover_databases() { log "=== DISCOVERING ALL DATABASES ===" # Check each Docker host for databases while IFS=: read -r host user status; do if [[ "$status" == *"DOCKER_AVAILABLE"* ]]; then log "Discovering databases on $host (user: $user)..." # Get containers that might be databases ssh "$user@$host" "docker ps --format '{{.Names}} {{.Image}}' | grep -iE '(postgres|mysql|mariadb|redis|mongodb|sqlite)'" > "$DISCOVERY_DIR/databases_${host}.txt" 2>/dev/null || true # For each database container, get detailed info while IFS= read -r db_line; do if [[ -n "$db_line" ]]; then container_name=$(echo "$db_line" | awk '{print $1}') image=$(echo "$db_line" | awk '{print $2}') log "Analyzing database container: $container_name ($image) on $host" # Get environment variables ssh "$user@$host" "docker inspect $container_name | jq '.[0].Config.Env[]' -r" > "$DISCOVERY_DIR/db_env_${host}_${container_name}.txt" 2>/dev/null || true # Get volume mounts ssh "$user@$host" "docker inspect $container_name | jq '.[0].Mounts[] | {Source: .Source, Destination: .Destination, Type: .Type}'" > "$DISCOVERY_DIR/db_mounts_${host}_${container_name}.json" 2>/dev/null || true # Get database type and version echo "Container: $container_name" > "$DISCOVERY_DIR/db_details_${host}_${container_name}.txt" echo "Image: $image" >> "$DISCOVERY_DIR/db_details_${host}_${container_name}.txt" echo "Host: $host" >> "$DISCOVERY_DIR/db_details_${host}_${container_name}.txt" # Try to get database version if [[ "$image" == *"postgres"* ]]; then ssh "$user@$host" "docker exec $container_name psql --version" >> "$DISCOVERY_DIR/db_details_${host}_${container_name}.txt" 2>/dev/null || true elif [[ "$image" == *"mysql"* ]] || [[ "$image" == *"mariadb"* ]]; then ssh "$user@$host" "docker exec $container_name mysql --version" >> "$DISCOVERY_DIR/db_details_${host}_${container_name}.txt" 2>/dev/null || true elif [[ "$image" == *"redis"* ]]; then ssh "$user@$host" "docker exec $container_name redis-server --version" >> "$DISCOVERY_DIR/db_details_${host}_${container_name}.txt" 2>/dev/null || true fi fi done < "$DISCOVERY_DIR/databases_${host}.txt" fi done < "$DISCOVERY_DIR/docker_hosts.txt" } # Discover all volumes and persistent data discover_volumes() { log "=== DISCOVERING ALL VOLUMES AND PERSISTENT DATA ===" # Check each Docker host for volumes while IFS=: read -r host user status; do if [[ "$status" == *"DOCKER_AVAILABLE"* ]]; then log "Discovering volumes on $host (user: $user)..." # Get all Docker volumes with details ssh "$user@$host" "docker volume ls -q | xargs -I {} docker volume inspect {}" > "$DISCOVERY_DIR/volume_details_${host}.json" 2>/dev/null || true # Get bind mounts from all containers ssh "$user@$host" "docker ps -q | xargs -I {} docker inspect {} | jq '.[] | {Name: .Name, Mounts: .Mounts}'" > "$DISCOVERY_DIR/bind_mounts_${host}.json" 2>/dev/null || true # Check for important directories that might contain data ssh "$user@$host" "find /opt /var/lib /home /root -name '*.db' -o -name '*.sqlite' -o -name 'data' -o -name 'config' 2>/dev/null | head -50" > "$DISCOVERY_DIR/important_dirs_${host}.txt" 2>/dev/null || true fi done < "$DISCOVERY_DIR/docker_hosts.txt" } # Discover all configuration files discover_configurations() { log "=== DISCOVERING ALL CONFIGURATIONS ===" # Local configurations log "Discovering local configurations..." # Docker Compose files find "$PROJECT_ROOT" -name "*.yml" -o -name "*.yaml" -o -name "docker-compose*" > "$DISCOVERY_DIR/local_configs.txt" # Environment files find "$PROJECT_ROOT" -name "*.env" -o -name ".env*" >> "$DISCOVERY_DIR/local_configs.txt" # Configuration directories find "$PROJECT_ROOT" -type d -name "config*" -o -name "conf*" -o -name "etc*" >> "$DISCOVERY_DIR/local_configs.txt" # Check each host for configurations while IFS=: read -r host user status; do if [[ "$status" == *"DOCKER_AVAILABLE"* ]]; then log "Discovering configurations on $host (user: $user)..." # Find configuration files ssh "$user@$host" "find /etc /opt /var/lib -name '*.conf' -o -name '*.yml' -o -name '*.yaml' -o -name '*.env' 2>/dev/null | head -100" > "$DISCOVERY_DIR/configs_${host}.txt" 2>/dev/null || true # Get Docker Compose files ssh "$user@$host" "find /opt /root /home -name 'docker-compose*.yml' -o -name '*.stack.yml' 2>/dev/null" > "$DISCOVERY_DIR/compose_files_${host}.txt" 2>/dev/null || true fi done < "$DISCOVERY_DIR/docker_hosts.txt" } # Discover all secrets and sensitive data discover_secrets() { log "=== DISCOVERING ALL SECRETS AND SENSITIVE DATA ===" # Local secrets if [[ -d "$PROJECT_ROOT/secrets" ]]; then log "Discovering local secrets..." find "$PROJECT_ROOT/secrets" -type f > "$DISCOVERY_DIR/local_secrets.txt" # Get secrets mapping if [[ -f "$PROJECT_ROOT/secrets/docker-secrets-mapping.yaml" ]]; then cp "$PROJECT_ROOT/secrets/docker-secrets-mapping.yaml" "$DISCOVERY_DIR/" fi fi # Check each host for secrets while IFS=: read -r host user status; do if [[ "$status" == *"DOCKER_AVAILABLE"* ]]; then log "Discovering secrets on $host (user: $user)..." # Check for Docker secrets ssh "$user@$host" "docker secret ls" > "$DISCOVERY_DIR/secrets_${host}.txt" 2>/dev/null || true # Check for environment files with secrets ssh "$user@$host" "find /opt /root /home -name '.env*' -o -name '*secret*' -o -name '*password*' 2>/dev/null" > "$DISCOVERY_DIR/secret_files_${host}.txt" 2>/dev/null || true # Check for SSL certificates ssh "$user@$host" "find /etc /opt -name '*.crt' -o -name '*.key' -o -name '*.pem' 2>/dev/null" > "$DISCOVERY_DIR/ssl_files_${host}.txt" 2>/dev/null || true fi done < "$DISCOVERY_DIR/docker_hosts.txt" } # Discover all network configurations discover_network_configs() { log "=== DISCOVERING ALL NETWORK CONFIGURATIONS ===" # Local network config log "Discovering local network configuration..." ip route > "$DISCOVERY_DIR/local_routes.txt" ip addr > "$DISCOVERY_DIR/local_interfaces.txt" cat /etc/hosts > "$DISCOVERY_DIR/local_hosts.txt" # Check each host for network config while IFS=: read -r host user status; do if [[ "$status" == *"DOCKER_AVAILABLE"* ]]; then log "Discovering network configuration on $host (user: $user)..." # Network interfaces ssh "$user@$host" "ip addr" > "$DISCOVERY_DIR/interfaces_${host}.txt" 2>/dev/null || true # Routing table ssh "$user@$host" "ip route" > "$DISCOVERY_DIR/routes_${host}.txt" 2>/dev/null || true # Hosts file ssh "$user@$host" "cat /etc/hosts" > "$DISCOVERY_DIR/hosts_${host}.txt" 2>/dev/null || true # Docker networks ssh "$user@$host" "docker network ls" > "$DISCOVERY_DIR/docker_networks_${host}.txt" 2>/dev/null || true fi done < "$DISCOVERY_DIR/docker_hosts.txt" } # Discover all user data and applications discover_user_data() { log "=== DISCOVERING ALL USER DATA AND APPLICATIONS ===" # Check each host for user data while IFS=: read -r host user status; do if [[ "$status" == *"DOCKER_AVAILABLE"* ]]; then log "Discovering user data on $host (user: $user)..." # Check for common application data directories ssh "$user@$host" "find /opt /var/lib /home -type d -name '*data*' -o -name '*app*' -o -name '*user*' 2>/dev/null | head -50" > "$DISCOVERY_DIR/app_dirs_${host}.txt" 2>/dev/null || true # Check for specific application directories ssh "$user@$host" "find /opt /var/lib -name '*nextcloud*' -o -name '*immich*' -o -name '*joplin*' -o -name '*photoprism*' 2>/dev/null" > "$DISCOVERY_DIR/specific_apps_${host}.txt" 2>/dev/null || true # Check for media files ssh "$user@$host" "find /opt /var/lib -type d -name '*media*' -o -name '*photos*' -o -name '*videos*' -o -name '*music*' 2>/dev/null" > "$DISCOVERY_DIR/media_dirs_${host}.txt" 2>/dev/null || true fi done < "$DISCOVERY_DIR/docker_hosts.txt" } # Discover all application-specific data discover_application_data() { log "=== DISCOVERING ALL APPLICATION-SPECIFIC DATA ===" # Check each host for application-specific data while IFS=: read -r host user status; do if [[ "$status" == *"DOCKER_AVAILABLE"* ]]; then log "Discovering application-specific data on $host (user: $user)..." # Check for Nextcloud data ssh "$user@$host" "find /opt /var/lib -name 'nextcloud' -type d -o -name 'nextcloud.db' 2>/dev/null" > "$DISCOVERY_DIR/nextcloud_data_${host}.txt" 2>/dev/null || true # Check for Immich data ssh "$user@$host" "find /opt /var/lib -name 'immich' -type d -o -name 'immich.db' 2>/dev/null" > "$DISCOVERY_DIR/immich_data_${host}.txt" 2>/dev/null || true # Check for Joplin data ssh "$user@$host" "find /opt /var/lib -name 'joplin' -type d -o -name 'joplin.db' 2>/dev/null" > "$DISCOVERY_DIR/joplin_data_${host}.txt" 2>/dev/null || true # Check for PhotoPrism data ssh "$user@$host" "find /opt /var/lib -name 'photoprism' -type d -o -name 'photoprism.db' 2>/dev/null" > "$DISCOVERY_DIR/photoprism_data_${host}.txt" 2>/dev/null || true # Check for specific application data directories ssh "$user@$host" "find /opt /var/lib -name '*nextcloud*' -o -name '*immich*' -o -name '*joplin*' -o -name '*photoprism*' 2>/dev/null" > "$DISCOVERY_DIR/specific_apps_${host}.txt" 2>/dev/null || true # Check for media files ssh "$user@$host" "find /opt /var/lib -type d -name '*media*' -o -name '*photos*' -o -name '*videos*' -o -name '*music*' 2>/dev/null" > "$DISCOVERY_DIR/media_dirs_${host}.txt" 2>/dev/null || true fi done < "$DISCOVERY_DIR/docker_hosts.txt" } # Generate comprehensive summary generate_discovery_summary() { log "=== GENERATING DISCOVERY SUMMARY ===" cat > "$DISCOVERY_DIR/DISCOVERY_SUMMARY.md" << EOF # Comprehensive Backup Target Discovery Summary **Discovery Timestamp:** $DISCOVERY_TIMESTAMP **Discovery Directory:** $DISCOVERY_DIR ## Hosts Discovered $(cat "$DISCOVERY_DIR/host_status.txt" 2>/dev/null || echo "No host status found") ## Docker Environments $(cat "$DISCOVERY_DIR/docker_hosts.txt" 2>/dev/null || echo "No Docker hosts found") ## Systemd Services $(for file in "$DISCOVERY_DIR"/active_services_*.txt; do if [[ -f "$file" ]]; then host=$(basename "$file" | sed 's/active_services_//;s/.txt//') echo "### $host" cat "$file" | sed 's/^/ - /' echo fi done) ## Databases Found $(for file in "$DISCOVERY_DIR"/databases_*.txt; do if [[ -f "$file" ]]; then host=$(basename "$file" | sed 's/databases_//;s/.txt//') echo "### $host" cat "$file" | sed 's/^/ - /' echo fi done) ## Volumes and Persistent Data $(for file in "$DISCOVERY_DIR"/volumes_*.txt; do if [[ -f "$file" ]]; then host=$(basename "$file" | sed 's/volumes_//;s/.txt//') echo "### $host" cat "$file" | sed 's/^/ - /' echo fi done) ## Configuration Files - Local configurations: $(wc -l < "$DISCOVERY_DIR/local_configs.txt" 2>/dev/null || echo "0") - Environment files: $(grep -c "\.env" "$DISCOVERY_DIR/local_configs.txt" 2>/dev/null || echo "0") ## Secrets and SSL Certificates - Local secrets: $(wc -l < "$DISCOVERY_DIR/local_secrets.txt" 2>/dev/null || echo "0") - SSL files across hosts: $(find "$DISCOVERY_DIR" -name "*ssl_files*.txt" | wc -l) ## Network Configurations - Local network config captured - Network configs for $(find "$DISCOVERY_DIR" -name "*interfaces*.txt" | wc -l) hosts ## User Data and Applications $(for file in "$DISCOVERY_DIR"/specific_apps_*.txt; do if [[ -f "$file" ]]; then host=$(basename "$file" | sed 's/specific_apps_//;s/.txt//') echo "### $host" cat "$file" | sed 's/^/ - /' echo fi done) ## Application-Specific Data $(for file in "$DISCOVERY_DIR"/nextcloud_data_*.txt "$DISCOVERY_DIR"/immich_data_*.txt "$DISCOVERY_DIR"/joplin_data_*.txt "$DISCOVERY_DIR"/photoprism_data_*.txt; do if [[ -f "$file" ]]; then host=$(basename "$file" | sed 's/nextcloud_data_//;s/immich_data_//;s/joplin_data_//;s/photoprism_data_//;s/.txt//') echo "### $host" cat "$file" | sed 's/^/ - /' echo fi done) ## Backup Requirements Summary ### Critical Data to Backup: 1. **Databases**: All PostgreSQL, MariaDB, Redis instances 2. **Volumes**: All Docker volumes and bind mounts 3. **Configurations**: All .env files, docker-compose files, config directories 4. **Secrets**: All SSL certificates, API keys, passwords 5. **User Data**: Nextcloud, Immich, Joplin, PhotoPrism data 6. **Network Configs**: Routing, interfaces, Docker networks 7. **Documentation**: All infrastructure documentation and scripts ### Estimated Backup Size: - Configuration files: ~10-50MB - Database dumps: ~100MB-1GB (depending on data) - User data: ~1-10GB (depending on media) - Total estimated: ~1-15GB ## Next Steps: 1. Review this discovery summary 2. Create comprehensive backup script based on discovered targets 3. Test backup process on non-critical data first 4. Execute full backup before migration EOF log "Discovery summary generated: $DISCOVERY_DIR/DISCOVERY_SUMMARY.md" } # Execute main function main "$@"