#!/bin/bash # Backup Verification and Testing Script # Validates backup integrity and tests restoration procedures # Import error handling library SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" source "$SCRIPT_DIR/lib/error_handling.sh" # Configuration readonly BACKUP_BASE_DIR="/opt/migration/backups" readonly VERIFICATION_DIR="/opt/migration/verification" readonly TEST_RESTORE_DIR="/opt/migration/test_restore" readonly VERIFICATION_LOG="$LOG_DIR/backup_verification_$(date +%Y%m%d_%H%M%S).log" # Cleanup function cleanup_verification() { log_info "Cleaning up verification directories..." if [[ -d "$TEST_RESTORE_DIR" ]]; then rm -rf "$TEST_RESTORE_DIR" log_info "Removed test restore directory" fi # Clean up any temporary Docker containers docker ps -a --filter "name=verification_test_*" -q | xargs -r docker rm -f 2>/dev/null || true # Clean up any temporary networks docker network ls --filter "name=verification_*" -q | xargs -r docker network rm 2>/dev/null || true } # Rollback function rollback_verification() { log_info "Rolling back verification processes..." cleanup_verification # Stop any running verification containers docker ps --filter "name=verification_*" -q | xargs -r docker stop 2>/dev/null || true } # Function to verify database dumps verify_database_dumps() { local snapshot_dir=$1 local dump_dir="$snapshot_dir/database_dumps" log_step "Verifying database dumps in $dump_dir..." if [[ ! -d "$dump_dir" ]]; then log_error "Database dump directory not found: $dump_dir" return 1 fi local verification_results="$VERIFICATION_DIR/database_verification.json" echo '{"dumps": []}' > "$verification_results" # Verify PostgreSQL dumps for dump_file in "$dump_dir"/postgres_dump_*.sql; do if [[ -f "$dump_file" ]]; then local host=$(basename "$dump_file" .sql | sed 's/postgres_dump_//') log_info "Verifying PostgreSQL dump for $host..." # Check file size local size=$(stat -f%z "$dump_file" 2>/dev/null || stat -c%s "$dump_file" 2>/dev/null || echo "0") # Check file content structure local has_header=$(head -5 "$dump_file" | grep -c "PostgreSQL database dump" || echo "0") local has_footer=$(tail -5 "$dump_file" | grep -c "PostgreSQL database dump complete" || echo "0") local table_count=$(grep -c "CREATE TABLE" "$dump_file" || echo "0") local data_count=$(grep -c "COPY.*FROM stdin" "$dump_file" || echo "0") # Test dump restoration local restore_success="false" if test_postgres_restore "$dump_file" "$host"; then restore_success="true" fi # Update verification results local dump_result=$(cat << EOF { "host": "$host", "file": "$dump_file", "size_bytes": $size, "has_header": $has_header, "has_footer": $has_footer, "table_count": $table_count, "data_count": $data_count, "restore_test": $restore_success, "verification_time": "$(date -Iseconds)" } EOF ) # Add to results JSON jq ".dumps += [$dump_result]" "$verification_results" > "${verification_results}.tmp" && mv "${verification_results}.tmp" "$verification_results" if [[ $size -gt 1000 ]] && [[ $has_header -gt 0 ]] && [[ $restore_success == "true" ]]; then log_success "✅ PostgreSQL dump verified for $host: ${size} bytes, ${table_count} tables" else log_error "❌ PostgreSQL dump verification failed for $host" fi fi done # Verify MySQL dumps for dump_file in "$dump_dir"/mysql_dump_*.sql; do if [[ -f "$dump_file" ]]; then local host=$(basename "$dump_file" .sql | sed 's/mysql_dump_//') log_info "Verifying MySQL dump for $host..." local size=$(stat -f%z "$dump_file" 2>/dev/null || stat -c%s "$dump_file" 2>/dev/null || echo "0") local has_header=$(head -10 "$dump_file" | grep -c "MySQL dump" || echo "0") local database_count=$(grep -c "CREATE DATABASE" "$dump_file" || echo "0") if [[ $size -gt 1000 ]] && [[ $has_header -gt 0 ]]; then log_success "✅ MySQL dump verified for $host: ${size} bytes, ${database_count} databases" else log_warn "⚠️ MySQL dump may have issues for $host" fi fi done log_success "Database dump verification completed" return 0 } # Function to test PostgreSQL dump restoration test_postgres_restore() { local dump_file=$1 local host=$2 log_info "Testing PostgreSQL restoration for $host..." # Create temporary PostgreSQL container for testing local test_container="verification_test_postgres_$host" local test_network="verification_network" # Create test network docker network create "$test_network" 2>/dev/null || true # Start temporary PostgreSQL container if docker run -d \ --name "$test_container" \ --network "$test_network" \ -e POSTGRES_PASSWORD=testpass \ -e POSTGRES_DB=testdb \ postgres:13 >/dev/null 2>&1; then # Wait for PostgreSQL to be ready if wait_for_service "PostgreSQL-$host" "docker exec $test_container pg_isready -U postgres" 60 5; then # Attempt restoration if docker exec -i "$test_container" psql -U postgres -d testdb < "$dump_file" >/dev/null 2>&1; then # Verify data was restored local table_count=$(docker exec "$test_container" psql -U postgres -d testdb -t -c "SELECT count(*) FROM information_schema.tables WHERE table_schema='public';" 2>/dev/null | xargs || echo "0") if [[ $table_count -gt 0 ]]; then log_success "PostgreSQL dump restoration test passed for $host ($table_count tables)" docker rm -f "$test_container" >/dev/null 2>&1 return 0 else log_warn "PostgreSQL dump restored but no tables found for $host" fi else log_error "PostgreSQL dump restoration failed for $host" fi else log_error "PostgreSQL container failed to start for $host test" fi # Cleanup docker rm -f "$test_container" >/dev/null 2>&1 else log_error "Failed to create PostgreSQL test container for $host" fi return 1 } # Function to verify configuration backups verify_configuration_backups() { local snapshot_dir=$1 log_step "Verifying configuration backups in $snapshot_dir..." local verification_results="$VERIFICATION_DIR/config_verification.json" echo '{"configs": []}' > "$verification_results" for config_backup in "$snapshot_dir"/config_backup_*.tar.gz; do if [[ -f "$config_backup" ]]; then local host=$(basename "$config_backup" .tar.gz | sed 's/config_backup_//') log_info "Verifying configuration backup for $host..." # Check file integrity local size=$(stat -f%z "$config_backup" 2>/dev/null || stat -c%s "$config_backup" 2>/dev/null || echo "0") local is_valid_gzip="false" if gzip -t "$config_backup" 2>/dev/null; then is_valid_gzip="true" log_success "✅ Configuration backup is valid gzip for $host" else log_error "❌ Configuration backup is corrupted for $host" fi # Test extraction local extraction_test="false" local test_extract_dir="$TEST_RESTORE_DIR/config_$host" mkdir -p "$test_extract_dir" if tar -tzf "$config_backup" >/dev/null 2>&1; then if tar -xzf "$config_backup" -C "$test_extract_dir" 2>/dev/null; then local extracted_files=$(find "$test_extract_dir" -type f | wc -l) if [[ $extracted_files -gt 0 ]]; then extraction_test="true" log_success "Configuration backup extraction test passed for $host ($extracted_files files)" fi fi fi # Update verification results local config_result=$(cat << EOF { "host": "$host", "file": "$config_backup", "size_bytes": $size, "is_valid_gzip": $is_valid_gzip, "extraction_test": $extraction_test, "verification_time": "$(date -Iseconds)" } EOF ) jq ".configs += [$config_result]" "$verification_results" > "${verification_results}.tmp" && mv "${verification_results}.tmp" "$verification_results" # Cleanup test extraction rm -rf "$test_extract_dir" 2>/dev/null || true fi done log_success "Configuration backup verification completed" return 0 } # Function to verify Docker state backups verify_docker_state_backups() { local snapshot_dir=$1 log_step "Verifying Docker state backups..." local verification_results="$VERIFICATION_DIR/docker_verification.json" echo '{"hosts": []}' > "$verification_results" for host_dir in "$snapshot_dir"/*; do if [[ -d "$host_dir" ]] && [[ $(basename "$host_dir") != "database_dumps" ]]; then local host=$(basename "$host_dir") log_info "Verifying Docker state for $host..." local containers_file="$host_dir/docker_containers.txt" local images_file="$host_dir/docker_images.txt" local networks_file="$host_dir/docker_networks.txt" local volumes_file="$host_dir/docker_volumes.txt" local container_count=0 local image_count=0 local network_count=0 local volume_count=0 # Count containers if [[ -f "$containers_file" ]]; then container_count=$(grep -c "^[^$]" "$containers_file" 2>/dev/null || echo "0") fi # Count images if [[ -f "$images_file" ]]; then image_count=$(grep -c "^[^$]" "$images_file" 2>/dev/null || echo "0") fi # Count networks if [[ -f "$networks_file" ]]; then network_count=$(grep -c "^[^$]" "$networks_file" 2>/dev/null || echo "0") fi # Count volumes if [[ -f "$volumes_file" ]]; then volume_count=$(grep -c "^[^$]" "$volumes_file" 2>/dev/null || echo "0") fi # Check for compose files local compose_files=0 if [[ -d "$host_dir/compose_files" ]]; then compose_files=$(find "$host_dir/compose_files" -name "*.yml" -o -name "*.yaml" | wc -l) fi local docker_result=$(cat << EOF { "host": "$host", "containers": $container_count, "images": $image_count, "networks": $network_count, "volumes": $volume_count, "compose_files": $compose_files, "verification_time": "$(date -Iseconds)" } EOF ) jq ".hosts += [$docker_result]" "$verification_results" > "${verification_results}.tmp" && mv "${verification_results}.tmp" "$verification_results" log_success "✅ Docker state verified for $host: $container_count containers, $image_count images" fi done log_success "Docker state verification completed" return 0 } # Function to create comprehensive verification report create_verification_report() { local snapshot_dir=$1 local report_file="$VERIFICATION_DIR/verification_report_$(date +%Y%m%d_%H%M%S).md" log_step "Creating comprehensive verification report..." cat > "$report_file" << EOF # Backup Verification Report **Generated:** $(date) **Snapshot Directory:** $snapshot_dir **Verification Directory:** $VERIFICATION_DIR ## Executive Summary EOF # Database verification summary if [[ -f "$VERIFICATION_DIR/database_verification.json" ]]; then local total_db_dumps=$(jq '.dumps | length' "$VERIFICATION_DIR/database_verification.json") local successful_restores=$(jq '.dumps | map(select(.restore_test == true)) | length' "$VERIFICATION_DIR/database_verification.json") echo "- **Database Dumps:** $total_db_dumps total, $successful_restores passed restore tests" >> "$report_file" fi # Configuration verification summary if [[ -f "$VERIFICATION_DIR/config_verification.json" ]]; then local total_configs=$(jq '.configs | length' "$VERIFICATION_DIR/config_verification.json") local valid_configs=$(jq '.configs | map(select(.is_valid_gzip == true and .extraction_test == true)) | length' "$VERIFICATION_DIR/config_verification.json") echo "- **Configuration Backups:** $total_configs total, $valid_configs verified" >> "$report_file" fi # Docker verification summary if [[ -f "$VERIFICATION_DIR/docker_verification.json" ]]; then local total_hosts=$(jq '.hosts | length' "$VERIFICATION_DIR/docker_verification.json") local total_containers=$(jq '.hosts | map(.containers) | add' "$VERIFICATION_DIR/docker_verification.json") echo "- **Docker States:** $total_hosts hosts, $total_containers total containers" >> "$report_file" fi cat >> "$report_file" << EOF ## Detailed Results ### Database Verification EOF # Database details if [[ -f "$VERIFICATION_DIR/database_verification.json" ]]; then jq -r '.dumps[] | "- **\(.host)**: \(.size_bytes) bytes, \(.table_count) tables, restore test: \(.restore_test)"' "$VERIFICATION_DIR/database_verification.json" >> "$report_file" fi cat >> "$report_file" << EOF ### Configuration Verification EOF # Configuration details if [[ -f "$VERIFICATION_DIR/config_verification.json" ]]; then jq -r '.configs[] | "- **\(.host)**: \(.size_bytes) bytes, valid: \(.is_valid_gzip), extractable: \(.extraction_test)"' "$VERIFICATION_DIR/config_verification.json" >> "$report_file" fi cat >> "$report_file" << EOF ### Docker State Verification EOF # Docker details if [[ -f "$VERIFICATION_DIR/docker_verification.json" ]]; then jq -r '.hosts[] | "- **\(.host)**: \(.containers) containers, \(.images) images, \(.compose_files) compose files"' "$VERIFICATION_DIR/docker_verification.json" >> "$report_file" fi cat >> "$report_file" << EOF ## Recommendations ### Critical Issues EOF # Identify critical issues local critical_issues=0 if [[ -f "$VERIFICATION_DIR/database_verification.json" ]]; then local failed_restores=$(jq '.dumps | map(select(.restore_test == false)) | length' "$VERIFICATION_DIR/database_verification.json") if [[ $failed_restores -gt 0 ]]; then echo "- ❌ **$failed_restores database dumps failed restore tests** - Re-create these backups" >> "$report_file" ((critical_issues++)) fi fi if [[ -f "$VERIFICATION_DIR/config_verification.json" ]]; then local invalid_configs=$(jq '.configs | map(select(.is_valid_gzip == false or .extraction_test == false)) | length' "$VERIFICATION_DIR/config_verification.json") if [[ $invalid_configs -gt 0 ]]; then echo "- ❌ **$invalid_configs configuration backups are corrupted** - Re-create these backups" >> "$report_file" ((critical_issues++)) fi fi if [[ $critical_issues -eq 0 ]]; then echo "- ✅ **No critical issues identified** - All backups are valid and restorable" >> "$report_file" fi cat >> "$report_file" << EOF ### Next Steps 1. **Address Critical Issues:** Fix any failed backups before proceeding 2. **Test Full Restoration:** Perform end-to-end restoration test in staging 3. **Document Procedures:** Update restoration procedures based on findings 4. **Schedule Regular Verification:** Implement automated backup verification ## Files and Logs - **Verification Log:** $VERIFICATION_LOG - **Database Results:** $VERIFICATION_DIR/database_verification.json - **Config Results:** $VERIFICATION_DIR/config_verification.json - **Docker Results:** $VERIFICATION_DIR/docker_verification.json EOF log_success "Verification report created: $report_file" echo "$report_file" } # Function to run full backup verification run_full_verification() { local snapshot_dir=${1:-"$BACKUP_BASE_DIR/latest"} if [[ ! -d "$snapshot_dir" ]]; then log_error "Snapshot directory not found: $snapshot_dir" return 1 fi log_step "Starting full backup verification for: $snapshot_dir" # Create verification directory mkdir -p "$VERIFICATION_DIR" mkdir -p "$TEST_RESTORE_DIR" # Register cleanup and rollback register_cleanup cleanup_verification register_rollback rollback_verification # Validate prerequisites validate_prerequisites docker jq gzip tar # Create checkpoint create_checkpoint "verification_start" # Verify database dumps if verify_database_dumps "$snapshot_dir"; then create_checkpoint "database_verification_complete" else log_error "Database verification failed" return 1 fi # Verify configuration backups if verify_configuration_backups "$snapshot_dir"; then create_checkpoint "config_verification_complete" else log_error "Configuration verification failed" return 1 fi # Verify Docker state backups if verify_docker_state_backups "$snapshot_dir"; then create_checkpoint "docker_verification_complete" else log_error "Docker verification failed" return 1 fi # Create comprehensive report local report_file=$(create_verification_report "$snapshot_dir") # Final summary log_success "✅ Backup verification completed successfully!" log_info "📊 Verification report: $report_file" # Display summary if [[ -f "$report_file" ]]; then echo "" echo "=== VERIFICATION SUMMARY ===" head -20 "$report_file" echo "" echo "Full report available at: $report_file" fi } # Main execution main() { local snapshot_dir=${1:-""} if [[ -z "$snapshot_dir" ]]; then # Use latest snapshot if no directory specified if [[ -L "$BACKUP_BASE_DIR/latest" ]]; then snapshot_dir=$(readlink -f "$BACKUP_BASE_DIR/latest") log_info "Using latest snapshot: $snapshot_dir" else log_error "No snapshot directory specified and no 'latest' link found" log_info "Usage: $0 [snapshot_directory]" log_info "Available snapshots:" ls -la "$BACKUP_BASE_DIR"/snapshot_* 2>/dev/null || echo "No snapshots found" exit 1 fi fi run_full_verification "$snapshot_dir" } # Execute main function main "$@"