Files
HomeAudit/migration_scripts/scripts/backup_verification.sh
2025-08-24 11:13:39 -04:00

526 lines
19 KiB
Bash
Executable File

#!/bin/bash
# Backup Verification and Testing Script
# Validates backup integrity and tests restoration procedures
# Import error handling library
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "$SCRIPT_DIR/lib/error_handling.sh"
# Configuration
readonly BACKUP_BASE_DIR="/opt/migration/backups"
readonly VERIFICATION_DIR="/opt/migration/verification"
readonly TEST_RESTORE_DIR="/opt/migration/test_restore"
readonly VERIFICATION_LOG="$LOG_DIR/backup_verification_$(date +%Y%m%d_%H%M%S).log"
# Cleanup function
cleanup_verification() {
log_info "Cleaning up verification directories..."
if [[ -d "$TEST_RESTORE_DIR" ]]; then
rm -rf "$TEST_RESTORE_DIR"
log_info "Removed test restore directory"
fi
# Clean up any temporary Docker containers
docker ps -a --filter "name=verification_test_*" -q | xargs -r docker rm -f 2>/dev/null || true
# Clean up any temporary networks
docker network ls --filter "name=verification_*" -q | xargs -r docker network rm 2>/dev/null || true
}
# Rollback function
rollback_verification() {
log_info "Rolling back verification processes..."
cleanup_verification
# Stop any running verification containers
docker ps --filter "name=verification_*" -q | xargs -r docker stop 2>/dev/null || true
}
# Function to verify database dumps
verify_database_dumps() {
local snapshot_dir=$1
local dump_dir="$snapshot_dir/database_dumps"
log_step "Verifying database dumps in $dump_dir..."
if [[ ! -d "$dump_dir" ]]; then
log_error "Database dump directory not found: $dump_dir"
return 1
fi
local verification_results="$VERIFICATION_DIR/database_verification.json"
echo '{"dumps": []}' > "$verification_results"
# Verify PostgreSQL dumps
for dump_file in "$dump_dir"/postgres_dump_*.sql; do
if [[ -f "$dump_file" ]]; then
local host=$(basename "$dump_file" .sql | sed 's/postgres_dump_//')
log_info "Verifying PostgreSQL dump for $host..."
# Check file size
local size=$(stat -f%z "$dump_file" 2>/dev/null || stat -c%s "$dump_file" 2>/dev/null || echo "0")
# Check file content structure
local has_header=$(head -5 "$dump_file" | grep -c "PostgreSQL database dump" || echo "0")
local has_footer=$(tail -5 "$dump_file" | grep -c "PostgreSQL database dump complete" || echo "0")
local table_count=$(grep -c "CREATE TABLE" "$dump_file" || echo "0")
local data_count=$(grep -c "COPY.*FROM stdin" "$dump_file" || echo "0")
# Test dump restoration
local restore_success="false"
if test_postgres_restore "$dump_file" "$host"; then
restore_success="true"
fi
# Update verification results
local dump_result=$(cat << EOF
{
"host": "$host",
"file": "$dump_file",
"size_bytes": $size,
"has_header": $has_header,
"has_footer": $has_footer,
"table_count": $table_count,
"data_count": $data_count,
"restore_test": $restore_success,
"verification_time": "$(date -Iseconds)"
}
EOF
)
# Add to results JSON
jq ".dumps += [$dump_result]" "$verification_results" > "${verification_results}.tmp" && mv "${verification_results}.tmp" "$verification_results"
if [[ $size -gt 1000 ]] && [[ $has_header -gt 0 ]] && [[ $restore_success == "true" ]]; then
log_success "✅ PostgreSQL dump verified for $host: ${size} bytes, ${table_count} tables"
else
log_error "❌ PostgreSQL dump verification failed for $host"
fi
fi
done
# Verify MySQL dumps
for dump_file in "$dump_dir"/mysql_dump_*.sql; do
if [[ -f "$dump_file" ]]; then
local host=$(basename "$dump_file" .sql | sed 's/mysql_dump_//')
log_info "Verifying MySQL dump for $host..."
local size=$(stat -f%z "$dump_file" 2>/dev/null || stat -c%s "$dump_file" 2>/dev/null || echo "0")
local has_header=$(head -10 "$dump_file" | grep -c "MySQL dump" || echo "0")
local database_count=$(grep -c "CREATE DATABASE" "$dump_file" || echo "0")
if [[ $size -gt 1000 ]] && [[ $has_header -gt 0 ]]; then
log_success "✅ MySQL dump verified for $host: ${size} bytes, ${database_count} databases"
else
log_warn "⚠️ MySQL dump may have issues for $host"
fi
fi
done
log_success "Database dump verification completed"
return 0
}
# Function to test PostgreSQL dump restoration
test_postgres_restore() {
local dump_file=$1
local host=$2
log_info "Testing PostgreSQL restoration for $host..."
# Create temporary PostgreSQL container for testing
local test_container="verification_test_postgres_$host"
local test_network="verification_network"
# Create test network
docker network create "$test_network" 2>/dev/null || true
# Start temporary PostgreSQL container
if docker run -d \
--name "$test_container" \
--network "$test_network" \
-e POSTGRES_PASSWORD=testpass \
-e POSTGRES_DB=testdb \
postgres:13 >/dev/null 2>&1; then
# Wait for PostgreSQL to be ready
if wait_for_service "PostgreSQL-$host" "docker exec $test_container pg_isready -U postgres" 60 5; then
# Attempt restoration
if docker exec -i "$test_container" psql -U postgres -d testdb < "$dump_file" >/dev/null 2>&1; then
# Verify data was restored
local table_count=$(docker exec "$test_container" psql -U postgres -d testdb -t -c "SELECT count(*) FROM information_schema.tables WHERE table_schema='public';" 2>/dev/null | xargs || echo "0")
if [[ $table_count -gt 0 ]]; then
log_success "PostgreSQL dump restoration test passed for $host ($table_count tables)"
docker rm -f "$test_container" >/dev/null 2>&1
return 0
else
log_warn "PostgreSQL dump restored but no tables found for $host"
fi
else
log_error "PostgreSQL dump restoration failed for $host"
fi
else
log_error "PostgreSQL container failed to start for $host test"
fi
# Cleanup
docker rm -f "$test_container" >/dev/null 2>&1
else
log_error "Failed to create PostgreSQL test container for $host"
fi
return 1
}
# Function to verify configuration backups
verify_configuration_backups() {
local snapshot_dir=$1
log_step "Verifying configuration backups in $snapshot_dir..."
local verification_results="$VERIFICATION_DIR/config_verification.json"
echo '{"configs": []}' > "$verification_results"
for config_backup in "$snapshot_dir"/config_backup_*.tar.gz; do
if [[ -f "$config_backup" ]]; then
local host=$(basename "$config_backup" .tar.gz | sed 's/config_backup_//')
log_info "Verifying configuration backup for $host..."
# Check file integrity
local size=$(stat -f%z "$config_backup" 2>/dev/null || stat -c%s "$config_backup" 2>/dev/null || echo "0")
local is_valid_gzip="false"
if gzip -t "$config_backup" 2>/dev/null; then
is_valid_gzip="true"
log_success "✅ Configuration backup is valid gzip for $host"
else
log_error "❌ Configuration backup is corrupted for $host"
fi
# Test extraction
local extraction_test="false"
local test_extract_dir="$TEST_RESTORE_DIR/config_$host"
mkdir -p "$test_extract_dir"
if tar -tzf "$config_backup" >/dev/null 2>&1; then
if tar -xzf "$config_backup" -C "$test_extract_dir" 2>/dev/null; then
local extracted_files=$(find "$test_extract_dir" -type f | wc -l)
if [[ $extracted_files -gt 0 ]]; then
extraction_test="true"
log_success "Configuration backup extraction test passed for $host ($extracted_files files)"
fi
fi
fi
# Update verification results
local config_result=$(cat << EOF
{
"host": "$host",
"file": "$config_backup",
"size_bytes": $size,
"is_valid_gzip": $is_valid_gzip,
"extraction_test": $extraction_test,
"verification_time": "$(date -Iseconds)"
}
EOF
)
jq ".configs += [$config_result]" "$verification_results" > "${verification_results}.tmp" && mv "${verification_results}.tmp" "$verification_results"
# Cleanup test extraction
rm -rf "$test_extract_dir" 2>/dev/null || true
fi
done
log_success "Configuration backup verification completed"
return 0
}
# Function to verify Docker state backups
verify_docker_state_backups() {
local snapshot_dir=$1
log_step "Verifying Docker state backups..."
local verification_results="$VERIFICATION_DIR/docker_verification.json"
echo '{"hosts": []}' > "$verification_results"
for host_dir in "$snapshot_dir"/*; do
if [[ -d "$host_dir" ]] && [[ $(basename "$host_dir") != "database_dumps" ]]; then
local host=$(basename "$host_dir")
log_info "Verifying Docker state for $host..."
local containers_file="$host_dir/docker_containers.txt"
local images_file="$host_dir/docker_images.txt"
local networks_file="$host_dir/docker_networks.txt"
local volumes_file="$host_dir/docker_volumes.txt"
local container_count=0
local image_count=0
local network_count=0
local volume_count=0
# Count containers
if [[ -f "$containers_file" ]]; then
container_count=$(grep -c "^[^$]" "$containers_file" 2>/dev/null || echo "0")
fi
# Count images
if [[ -f "$images_file" ]]; then
image_count=$(grep -c "^[^$]" "$images_file" 2>/dev/null || echo "0")
fi
# Count networks
if [[ -f "$networks_file" ]]; then
network_count=$(grep -c "^[^$]" "$networks_file" 2>/dev/null || echo "0")
fi
# Count volumes
if [[ -f "$volumes_file" ]]; then
volume_count=$(grep -c "^[^$]" "$volumes_file" 2>/dev/null || echo "0")
fi
# Check for compose files
local compose_files=0
if [[ -d "$host_dir/compose_files" ]]; then
compose_files=$(find "$host_dir/compose_files" -name "*.yml" -o -name "*.yaml" | wc -l)
fi
local docker_result=$(cat << EOF
{
"host": "$host",
"containers": $container_count,
"images": $image_count,
"networks": $network_count,
"volumes": $volume_count,
"compose_files": $compose_files,
"verification_time": "$(date -Iseconds)"
}
EOF
)
jq ".hosts += [$docker_result]" "$verification_results" > "${verification_results}.tmp" && mv "${verification_results}.tmp" "$verification_results"
log_success "✅ Docker state verified for $host: $container_count containers, $image_count images"
fi
done
log_success "Docker state verification completed"
return 0
}
# Function to create comprehensive verification report
create_verification_report() {
local snapshot_dir=$1
local report_file="$VERIFICATION_DIR/verification_report_$(date +%Y%m%d_%H%M%S).md"
log_step "Creating comprehensive verification report..."
cat > "$report_file" << EOF
# Backup Verification Report
**Generated:** $(date)
**Snapshot Directory:** $snapshot_dir
**Verification Directory:** $VERIFICATION_DIR
## Executive Summary
EOF
# Database verification summary
if [[ -f "$VERIFICATION_DIR/database_verification.json" ]]; then
local total_db_dumps=$(jq '.dumps | length' "$VERIFICATION_DIR/database_verification.json")
local successful_restores=$(jq '.dumps | map(select(.restore_test == true)) | length' "$VERIFICATION_DIR/database_verification.json")
echo "- **Database Dumps:** $total_db_dumps total, $successful_restores passed restore tests" >> "$report_file"
fi
# Configuration verification summary
if [[ -f "$VERIFICATION_DIR/config_verification.json" ]]; then
local total_configs=$(jq '.configs | length' "$VERIFICATION_DIR/config_verification.json")
local valid_configs=$(jq '.configs | map(select(.is_valid_gzip == true and .extraction_test == true)) | length' "$VERIFICATION_DIR/config_verification.json")
echo "- **Configuration Backups:** $total_configs total, $valid_configs verified" >> "$report_file"
fi
# Docker verification summary
if [[ -f "$VERIFICATION_DIR/docker_verification.json" ]]; then
local total_hosts=$(jq '.hosts | length' "$VERIFICATION_DIR/docker_verification.json")
local total_containers=$(jq '.hosts | map(.containers) | add' "$VERIFICATION_DIR/docker_verification.json")
echo "- **Docker States:** $total_hosts hosts, $total_containers total containers" >> "$report_file"
fi
cat >> "$report_file" << EOF
## Detailed Results
### Database Verification
EOF
# Database details
if [[ -f "$VERIFICATION_DIR/database_verification.json" ]]; then
jq -r '.dumps[] | "- **\(.host)**: \(.size_bytes) bytes, \(.table_count) tables, restore test: \(.restore_test)"' "$VERIFICATION_DIR/database_verification.json" >> "$report_file"
fi
cat >> "$report_file" << EOF
### Configuration Verification
EOF
# Configuration details
if [[ -f "$VERIFICATION_DIR/config_verification.json" ]]; then
jq -r '.configs[] | "- **\(.host)**: \(.size_bytes) bytes, valid: \(.is_valid_gzip), extractable: \(.extraction_test)"' "$VERIFICATION_DIR/config_verification.json" >> "$report_file"
fi
cat >> "$report_file" << EOF
### Docker State Verification
EOF
# Docker details
if [[ -f "$VERIFICATION_DIR/docker_verification.json" ]]; then
jq -r '.hosts[] | "- **\(.host)**: \(.containers) containers, \(.images) images, \(.compose_files) compose files"' "$VERIFICATION_DIR/docker_verification.json" >> "$report_file"
fi
cat >> "$report_file" << EOF
## Recommendations
### Critical Issues
EOF
# Identify critical issues
local critical_issues=0
if [[ -f "$VERIFICATION_DIR/database_verification.json" ]]; then
local failed_restores=$(jq '.dumps | map(select(.restore_test == false)) | length' "$VERIFICATION_DIR/database_verification.json")
if [[ $failed_restores -gt 0 ]]; then
echo "- ❌ **$failed_restores database dumps failed restore tests** - Re-create these backups" >> "$report_file"
((critical_issues++))
fi
fi
if [[ -f "$VERIFICATION_DIR/config_verification.json" ]]; then
local invalid_configs=$(jq '.configs | map(select(.is_valid_gzip == false or .extraction_test == false)) | length' "$VERIFICATION_DIR/config_verification.json")
if [[ $invalid_configs -gt 0 ]]; then
echo "- ❌ **$invalid_configs configuration backups are corrupted** - Re-create these backups" >> "$report_file"
((critical_issues++))
fi
fi
if [[ $critical_issues -eq 0 ]]; then
echo "- ✅ **No critical issues identified** - All backups are valid and restorable" >> "$report_file"
fi
cat >> "$report_file" << EOF
### Next Steps
1. **Address Critical Issues:** Fix any failed backups before proceeding
2. **Test Full Restoration:** Perform end-to-end restoration test in staging
3. **Document Procedures:** Update restoration procedures based on findings
4. **Schedule Regular Verification:** Implement automated backup verification
## Files and Logs
- **Verification Log:** $VERIFICATION_LOG
- **Database Results:** $VERIFICATION_DIR/database_verification.json
- **Config Results:** $VERIFICATION_DIR/config_verification.json
- **Docker Results:** $VERIFICATION_DIR/docker_verification.json
EOF
log_success "Verification report created: $report_file"
echo "$report_file"
}
# Function to run full backup verification
run_full_verification() {
local snapshot_dir=${1:-"$BACKUP_BASE_DIR/latest"}
if [[ ! -d "$snapshot_dir" ]]; then
log_error "Snapshot directory not found: $snapshot_dir"
return 1
fi
log_step "Starting full backup verification for: $snapshot_dir"
# Create verification directory
mkdir -p "$VERIFICATION_DIR"
mkdir -p "$TEST_RESTORE_DIR"
# Register cleanup and rollback
register_cleanup cleanup_verification
register_rollback rollback_verification
# Validate prerequisites
validate_prerequisites docker jq gzip tar
# Create checkpoint
create_checkpoint "verification_start"
# Verify database dumps
if verify_database_dumps "$snapshot_dir"; then
create_checkpoint "database_verification_complete"
else
log_error "Database verification failed"
return 1
fi
# Verify configuration backups
if verify_configuration_backups "$snapshot_dir"; then
create_checkpoint "config_verification_complete"
else
log_error "Configuration verification failed"
return 1
fi
# Verify Docker state backups
if verify_docker_state_backups "$snapshot_dir"; then
create_checkpoint "docker_verification_complete"
else
log_error "Docker verification failed"
return 1
fi
# Create comprehensive report
local report_file=$(create_verification_report "$snapshot_dir")
# Final summary
log_success "✅ Backup verification completed successfully!"
log_info "📊 Verification report: $report_file"
# Display summary
if [[ -f "$report_file" ]]; then
echo ""
echo "=== VERIFICATION SUMMARY ==="
head -20 "$report_file"
echo ""
echo "Full report available at: $report_file"
fi
}
# Main execution
main() {
local snapshot_dir=${1:-""}
if [[ -z "$snapshot_dir" ]]; then
# Use latest snapshot if no directory specified
if [[ -L "$BACKUP_BASE_DIR/latest" ]]; then
snapshot_dir=$(readlink -f "$BACKUP_BASE_DIR/latest")
log_info "Using latest snapshot: $snapshot_dir"
else
log_error "No snapshot directory specified and no 'latest' link found"
log_info "Usage: $0 [snapshot_directory]"
log_info "Available snapshots:"
ls -la "$BACKUP_BASE_DIR"/snapshot_* 2>/dev/null || echo "No snapshots found"
exit 1
fi
fi
run_full_verification "$snapshot_dir"
}
# Execute main function
main "$@"