#!/bin/bash # Service Migration Validator # Ensures complete and accurate migration of ALL services with configuration modifications # Import error handling library SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" source "$SCRIPT_DIR/lib/error_handling.sh" # Configuration readonly VALIDATION_DIR="/opt/migration/validation" readonly SERVICE_CONFIGS_DIR="/opt/migration/configs/services" readonly MIGRATION_MAPPINGS="/opt/migration/configs/service_mappings.yml" readonly PRE_MIGRATION_SNAPSHOT="/opt/migration/backups/latest" # Complete service inventory with all dependencies and configurations declare -A SERVICE_INVENTORY=( # Storage and Database Services ["postgres"]="config:/var/lib/postgresql/data/postgresql.conf,data:/var/lib/postgresql/data,secrets:postgres_password,networks:data-zone,dependencies:none" ["redis"]="config:/data/redis.conf,data:/data,secrets:redis_password,networks:data-zone,dependencies:none" # Core Application Services ["traefik"]="config:/etc/traefik,data:/certificates,secrets:traefik_users,networks:public-zone,dependencies:none" ["immich-server"]="config:/usr/src/app/upload,data:/usr/src/app/upload,secrets:postgres_password,networks:dmz-zone,dependencies:postgres,redis" ["immich-microservices"]="config:shared,data:shared,secrets:postgres_password,networks:dmz-zone,dependencies:postgres,redis,immich-server" ["immich-machine-learning"]="config:/cache,data:/cache,secrets:none,networks:dmz-zone,dependencies:immich-server" ["immich-web"]="config:none,data:none,secrets:none,networks:dmz-zone,dependencies:immich-server" ["jellyfin"]="config:/config,data:/media,secrets:none,networks:dmz-zone,dependencies:none" ["homeassistant"]="config:/config,data:/config,secrets:none,networks:dmz-zone,dependencies:none" ["appflowy"]="config:/app/data,data:/app/data,secrets:postgres_password,networks:dmz-zone,dependencies:postgres" ["paperless-ngx"]="config:/usr/src/paperless/data,data:/usr/src/paperless/media,secrets:postgres_password,networks:dmz-zone,dependencies:postgres,redis" # Management and Monitoring Services ["portainer"]="config:/data,data:/data,secrets:none,networks:management-zone,dependencies:none" ["grafana"]="config:/etc/grafana,data:/var/lib/grafana,secrets:grafana_admin_password,networks:monitoring-zone,dependencies:none" ["prometheus"]="config:/etc/prometheus,data:/prometheus,secrets:none,networks:monitoring-zone,dependencies:none" ["alertmanager"]="config:/etc/alertmanager,data:/alertmanager,secrets:none,networks:monitoring-zone,dependencies:prometheus" ["cadvisor"]="config:none,data:none,secrets:none,networks:monitoring-zone,dependencies:none" ["node-exporter"]="config:none,data:none,secrets:none,networks:monitoring-zone,dependencies:none" # IoT and Automation Services ["esphome"]="config:/config,data:/config,secrets:none,networks:dmz-zone,dependencies:homeassistant" ["mqtt"]="config:/mosquitto/config,data:/mosquitto/data,secrets:none,networks:dmz-zone,dependencies:homeassistant" # Backup and Utility Services ["duplicati"]="config:/data,data:/backups,secrets:none,networks:internal-zone,dependencies:none" ["watchtower"]="config:none,data:none,secrets:none,networks:management-zone,dependencies:none" ) # Service configuration transformations needed for new architecture declare -A CONFIG_TRANSFORMATIONS=( ["all"]="docker_networks,security_zones,secrets_management,resource_limits,health_checks" ["postgres"]="replication_setup,connection_pooling,performance_tuning" ["redis"]="clustering,persistence_config,memory_optimization" ["traefik"]="network_segmentation,ssl_enhancement,middleware_updates" ["immich-server"]="database_connection_update,redis_connection_update,file_path_updates" ["jellyfin"]="gpu_passthrough,transcoding_optimization,network_updates" ["homeassistant"]="mqtt_integration,device_discovery,automation_updates" ["grafana"]="datasource_updates,dashboard_provisioning,authentication_updates" ["prometheus"]="target_updates,alert_rule_updates,storage_optimization" ) # Cleanup function cleanup_validation() { log_info "Cleaning up validation temporary files..." # Clean up temporary comparison files find /tmp -name "validation_*.tmp" -mmin +60 -delete 2>/dev/null || true find /tmp -name "service_*.tmp" -mmin +60 -delete 2>/dev/null || true log_info "Validation cleanup completed" } # Rollback function rollback_validation() { log_info "Rolling back validation processes..." # Stop any running validation containers docker ps --filter "name=validation_*" -q | xargs -r docker stop 2>/dev/null || true docker ps -a --filter "name=validation_*" -q | xargs -r docker rm 2>/dev/null || true cleanup_validation log_info "Validation rollback completed" } # Function to create service mapping configuration create_service_mappings() { log_step "Creating comprehensive service mapping configuration..." mkdir -p "$(dirname "$MIGRATION_MAPPINGS")" cat > "$MIGRATION_MAPPINGS" << 'EOF' # Service Migration Mappings # Defines how services map from current to new architecture version: "2.0" # Global transformations applied to all services global_transformations: networks: old_network: "default" new_networks: public: ["traefik"] dmz: ["immich-server", "immich-web", "immich-microservices", "immich-machine-learning", "jellyfin", "homeassistant", "appflowy", "paperless-ngx", "esphome", "mqtt"] internal: ["portainer", "duplicati", "watchtower"] data: ["postgres", "redis"] monitoring: ["grafana", "prometheus", "alertmanager", "cadvisor", "node-exporter"] security: secrets_migration: hardcoded_passwords: "docker_secrets" config_files: "docker_configs" certificates: "docker_secrets" resource_constraints: cpu_limits: true memory_limits: true restart_policies: true health_checks: true # Service-specific mappings and transformations services: postgres: current_location: "omv800:/var/lib/docker/volumes/postgres_data" new_location: "postgres-primary-data" transformations: - type: "replication_setup" config: primary_host: "omv800" replica_host: "fedora" streaming_replication: true - type: "connection_pooling" config: max_connections: 200 shared_buffers: "2GB" effective_cache_size: "6GB" - type: "backup_configuration" config: wal_archiving: true point_in_time_recovery: true dependencies: before: [] after: ["network_setup", "storage_setup"] redis: current_location: "omv800:/var/lib/docker/volumes/redis_data" new_location: "redis-primary-data" transformations: - type: "clustering_setup" config: cluster_mode: true replicas: 2 sentinel: true - type: "persistence_config" config: save_frequency: "900 1" aof_enabled: true aof_fsync: "everysec" dependencies: before: [] after: ["network_setup"] immich-server: current_location: "omv800:/var/lib/docker/volumes/immich_server_data" new_location: "immich-server-data" transformations: - type: "database_connection" config: old_host: "postgres" new_host: "postgres-primary" connection_string_update: true - type: "redis_connection" config: old_host: "redis" new_host: "redis-primary" session_storage_update: true - type: "storage_optimization" config: file_storage: "/usr/src/app/upload" thumbnail_storage: "/usr/src/app/upload/thumbs" nfs_optimization: true dependencies: before: ["postgres", "redis"] after: ["immich-microservices", "immich-machine-learning"] immich-microservices: current_location: "shared_with_server" new_location: "shared_with_server" transformations: - type: "job_queue_optimization" config: queue_backend: "redis" concurrent_jobs: 4 memory_limit: "2GB" dependencies: before: ["immich-server", "redis"] after: [] immich-machine-learning: current_location: "omv800:/var/lib/docker/volumes/immich_ml_cache" new_location: "immich-ml-cache" transformations: - type: "gpu_optimization" config: gpu_enabled: true gpu_device: "/dev/dri" tensorflow_optimization: true - type: "model_caching" config: cache_size: "10GB" model_download: true dependencies: before: ["immich-server"] after: [] jellyfin: current_location: "omv800:/var/lib/docker/volumes/jellyfin_config" new_location: "jellyfin-config" transformations: - type: "hardware_transcoding" config: gpu_acceleration: true vaapi_device: "/dev/dri/renderD128" hardware_decoding: true - type: "media_optimization" config: media_path: "/media" transcoding_temp: "/tmp/jellyfin" cache_optimization: true - type: "network_optimization" config: local_network_subnets: ["192.168.50.0/24", "10.0.0.0/8"] external_access: true dependencies: before: [] after: [] homeassistant: current_location: "jonathan-2518f5u:/var/lib/docker/volumes/homeassistant_config" new_location: "homeassistant-config" transformations: - type: "device_discovery" config: mdns_enabled: true upnp_enabled: true network_discovery: ["192.168.50.0/24"] - type: "integration_updates" config: mqtt_broker: "mqtt-broker" database_url_update: true recorder_optimization: true - type: "automation_migration" config: yaml_validation: true deprecated_features: "update" dependencies: before: ["mqtt"] after: ["esphome"] traefik: current_location: "omv800:/var/lib/docker/volumes/traefik_data" new_location: "traefik-certificates" transformations: - type: "network_segmentation" config: frontend_network: "public-zone" backend_networks: ["dmz-zone", "internal-zone", "monitoring-zone"] network_isolation: true - type: "ssl_enhancement" config: tls_version_min: "1.2" cipher_suites: "secure_only" hsts_enabled: true - type: "middleware_updates" config: rate_limiting: true security_headers: true authentication: "secrets_based" dependencies: before: ["network_setup"] after: ["all_backend_services"] grafana: current_location: "audrey:/var/lib/docker/volumes/grafana_data" new_location: "grafana-data" transformations: - type: "datasource_provisioning" config: prometheus_url: "http://prometheus:9090" loki_url: "http://loki:3100" automatic_provisioning: true - type: "dashboard_migration" config: dashboard_provisioning: true plugin_updates: true theme_consistency: true dependencies: before: ["prometheus"] after: [] prometheus: current_location: "audrey:/var/lib/docker/volumes/prometheus_data" new_location: "prometheus-data" transformations: - type: "target_discovery" config: service_discovery: "docker_swarm" static_targets: "update" scrape_interval_optimization: true - type: "storage_optimization" config: retention_time: "30d" storage_path: "/prometheus" wal_compression: true dependencies: before: [] after: ["alertmanager", "grafana"] # Migration phases and ordering migration_phases: phase_1_infrastructure: order: 1 services: ["network_setup", "storage_setup", "secrets_setup"] parallel: false phase_2_data_layer: order: 2 services: ["postgres", "redis"] parallel: false validation_required: true phase_3_core_services: order: 3 services: ["traefik"] parallel: false health_check_required: true phase_4_application_services: order: 4 services: ["immich-server", "jellyfin", "homeassistant", "appflowy", "paperless-ngx"] parallel: true max_concurrent: 2 validation_required: true phase_5_dependent_services: order: 5 services: ["immich-microservices", "immich-machine-learning", "immich-web", "esphome", "mqtt"] parallel: true max_concurrent: 3 phase_6_monitoring: order: 6 services: ["prometheus", "alertmanager", "grafana", "cadvisor", "node-exporter"] parallel: true max_concurrent: 3 phase_7_management: order: 7 services: ["portainer", "duplicati", "watchtower"] parallel: true # Validation requirements validation_requirements: data_integrity: database_checksums: true file_checksums: true configuration_validation: true functionality_testing: health_endpoints: true authentication_testing: true integration_testing: true performance_baseline: true rollback_preparation: backup_verification: true rollback_scripts: true emergency_procedures: true EOF log_success "Service mapping configuration created: $MIGRATION_MAPPINGS" } # Function to validate current service state validate_current_state() { log_step "Validating current service state and configurations..." local validation_report="$VALIDATION_DIR/current_state_validation.json" mkdir -p "$VALIDATION_DIR" # Initialize validation report cat > "$validation_report" << 'EOF' { "validation_timestamp": "", "total_services": 0, "services_validated": 0, "validation_errors": 0, "services": {}, "missing_configurations": [], "missing_data": [], "dependency_issues": [] } EOF # Update timestamp jq --arg timestamp "$(date -Iseconds)" '.validation_timestamp = $timestamp' "$validation_report" > "${validation_report}.tmp" && mv "${validation_report}.tmp" "$validation_report" local services_validated=0 local validation_errors=0 # Validate each service in the inventory for service in "${!SERVICE_INVENTORY[@]}"; do log_info "Validating service: $service" local service_info="${SERVICE_INVENTORY[$service]}" local validation_result=$(validate_individual_service "$service" "$service_info") # Update validation report jq --arg service "$service" \ --argjson result "$validation_result" \ '.services[$service] = $result' "$validation_report" > "${validation_report}.tmp" && mv "${validation_report}.tmp" "$validation_report" # Check validation result local service_status=$(echo "$validation_result" | jq -r '.status') if [[ "$service_status" == "valid" ]]; then ((services_validated++)) else ((validation_errors++)) log_warn "Validation issues found for $service" fi done # Update final statistics jq --argjson total "${#SERVICE_INVENTORY[@]}" \ --argjson validated "$services_validated" \ --argjson errors "$validation_errors" \ '.total_services = $total | .services_validated = $validated | .validation_errors = $errors' \ "$validation_report" > "${validation_report}.tmp" && mv "${validation_report}.tmp" "$validation_report" log_success "Current state validation completed: $services_validated/${#SERVICE_INVENTORY[@]} services validated" if [[ $validation_errors -gt 0 ]]; then log_error "$validation_errors validation errors found - check report: $validation_report" return 1 fi return 0 } # Function to validate individual service validate_individual_service() { local service_name=$1 local service_info=$2 # Parse service information local config_path=$(echo "$service_info" | cut -d',' -f1 | cut -d':' -f2-) local data_path=$(echo "$service_info" | cut -d',' -f2 | cut -d':' -f2-) local secrets=$(echo "$service_info" | cut -d',' -f3 | cut -d':' -f2-) local networks=$(echo "$service_info" | cut -d',' -f4 | cut -d':' -f2-) local dependencies=$(echo "$service_info" | cut -d',' -f5 | cut -d':' -f2-) local validation_issues=() local validation_status="valid" # Find the host where this service is currently running local service_host="" local service_container="" # Check each host for the service for host in omv800 fedora surface jonathan-2518f5u audrey; do local container_check=$(ssh -o ConnectTimeout=10 "$host" "docker ps --format '{{.Names}}' | grep -i '$service_name'" 2>/dev/null | head -1 || echo "") if [[ -n "$container_check" ]]; then service_host="$host" service_container="$container_check" break fi done # If service not found, check if it's a new service that doesn't exist yet if [[ -z "$service_host" ]]; then # Check if this is a service that should exist case "$service_name" in "postgres"|"redis"|"traefik"|"immich-server"|"jellyfin"|"homeassistant"|"grafana"|"prometheus") validation_issues+=("Service $service_name not found on any host") validation_status="missing" ;; *) # May be a new service or microservice log_debug "Service $service_name not found - may be new or decomposed service" ;; esac else # Validate service configuration if [[ "$config_path" != "none" ]] && [[ "$config_path" != "shared" ]]; then if ! ssh "$service_host" "test -d $config_path || docker exec $service_container test -d $config_path" 2>/dev/null; then validation_issues+=("Configuration path not found: $config_path") validation_status="config_missing" fi fi # Validate service data if [[ "$data_path" != "none" ]] && [[ "$data_path" != "shared" ]]; then if ! ssh "$service_host" "test -d $data_path || docker exec $service_container test -d $data_path" 2>/dev/null; then validation_issues+=("Data path not found: $data_path") validation_status="data_missing" fi fi # Validate service health local health_status="unknown" if [[ -n "$service_container" ]]; then health_status=$(ssh "$service_host" "docker inspect --format='{{.State.Health.Status}}' $service_container" 2>/dev/null || echo "unknown") if [[ "$health_status" == "unhealthy" ]]; then validation_issues+=("Service health check failing") validation_status="unhealthy" fi fi fi # Validate dependencies if [[ "$dependencies" != "none" ]]; then IFS=',' read -ra DEPS <<< "$dependencies" for dep in "${DEPS[@]}"; do local dep_found=false for host in omv800 fedora surface jonathan-2518f5u audrey; do if ssh -o ConnectTimeout=10 "$host" "docker ps --format '{{.Names}}' | grep -i '$dep'" >/dev/null 2>&1; then dep_found=true break fi done if [[ "$dep_found" == false ]]; then validation_issues+=("Dependency not found: $dep") if [[ "$validation_status" == "valid" ]]; then validation_status="dependency_missing" fi fi done fi # Create validation result JSON local issues_json="[]" if [[ ${#validation_issues[@]} -gt 0 ]]; then issues_json=$(printf '%s\n' "${validation_issues[@]}" | jq -R . | jq -s .) fi cat << EOF { "service": "$service_name", "status": "$validation_status", "current_host": "$service_host", "current_container": "$service_container", "config_path": "$config_path", "data_path": "$data_path", "secrets": "$secrets", "networks": "$networks", "dependencies": "$dependencies", "validation_issues": $issues_json, "validated_at": "$(date -Iseconds)" } EOF } # Function to create migration transformation scripts create_migration_transformations() { log_step "Creating service migration transformation scripts..." local transformations_dir="$SERVICE_CONFIGS_DIR/transformations" mkdir -p "$transformations_dir" # Create transformation for PostgreSQL clustering cat > "$transformations_dir/postgres_transformation.sh" << 'EOF' #!/bin/bash # PostgreSQL Migration Transformation set -euo pipefail source "$(dirname "$0")/../../../lib/error_handling.sh" log_step "Transforming PostgreSQL configuration for clustering..." # Create new PostgreSQL configuration with clustering cat > /tmp/postgresql.conf << 'PG_EOF' # PostgreSQL Configuration for Docker Swarm Clustering listen_addresses = '*' port = 5432 # Memory settings shared_buffers = 2GB effective_cache_size = 6GB maintenance_work_mem = 512MB work_mem = 32MB # WAL settings for replication wal_level = replica archive_mode = on archive_command = 'test ! -f /var/lib/postgresql/wal_archive/%f && cp %p /var/lib/postgresql/wal_archive/%f' max_wal_senders = 3 wal_keep_segments = 64 # Replication settings hot_standby = on hot_standby_feedback = on # Connection settings max_connections = 200 shared_preload_libraries = 'pg_stat_statements' # Logging log_destination = 'stderr' logging_collector = on log_directory = 'log' log_filename = 'postgresql-%Y-%m-%d_%H%M%S.log' log_statement = 'mod' log_min_duration_statement = 1000 # Performance tuning checkpoint_completion_target = 0.9 wal_buffers = 64MB default_statistics_target = 100 random_page_cost = 1.1 effective_io_concurrency = 200 PG_EOF # Create replication user setup cat > /tmp/setup_replication.sql << 'SQL_EOF' -- Create replication user CREATE USER replicator WITH REPLICATION ENCRYPTED PASSWORD 'replication_password_here'; -- Create monitoring user CREATE USER pg_monitor WITH PASSWORD 'monitoring_password_here'; GRANT pg_monitor TO pg_monitor; -- Optimize for performance CREATE EXTENSION IF NOT EXISTS pg_stat_statements; CREATE EXTENSION IF NOT EXISTS pg_buffercache; SQL_EOF log_success "PostgreSQL transformation configuration created" EOF # Create transformation for Immich service updates cat > "$transformations_dir/immich_transformation.sh" << 'EOF' #!/bin/bash # Immich Migration Transformation set -euo pipefail source "$(dirname "$0")/../../../lib/error_handling.sh" log_step "Transforming Immich configuration for new architecture..." # Update database connection configuration cat > /tmp/immich_database_config.json << 'JSON_EOF' { "database": { "host": "postgres-primary", "port": 5432, "username": "immich", "database": "immich", "passwordFile": "/run/secrets/postgres_password" }, "redis": { "host": "redis-primary", "port": 6379, "passwordFile": "/run/secrets/redis_password" }, "storage": { "uploadPath": "/usr/src/app/upload", "thumbnailPath": "/usr/src/app/upload/thumbs", "profileImagePath": "/usr/src/app/upload/profile", "videoPath": "/usr/src/app/upload/videos" }, "machinelearning": { "enabled": true, "url": "http://immich-machine-learning:3003", "clipModel": "ViT-B-32__openai" } } JSON_EOF # Create optimized Docker Compose configuration for Immich cat > /tmp/immich_compose_optimized.yml << 'COMPOSE_EOF' version: '3.8' services: immich-server: image: ghcr.io/immich-app/immich-server:release environment: - DB_HOSTNAME=postgres-primary - DB_USERNAME=immich - DB_PASSWORD_FILE=/run/secrets/postgres_password - DB_DATABASE_NAME=immich - REDIS_HOSTNAME=redis-primary - REDIS_PASSWORD_FILE=/run/secrets/redis_password - UPLOAD_LOCATION=/usr/src/app/upload volumes: - immich-upload-data:/usr/src/app/upload - /etc/localtime:/etc/localtime:ro secrets: - postgres_password - redis_password networks: - dmz-zone - data-zone deploy: replicas: 2 placement: constraints: - node.labels.role.storage == true resources: limits: memory: 2G cpus: '1.0' reservations: memory: 1G cpus: '0.5' labels: - "traefik.enable=true" - "traefik.http.routers.immich.rule=Host(`immich.${DOMAIN}`)" - "traefik.http.routers.immich.entrypoints=websecure" - "traefik.http.routers.immich.tls.certresolver=letsencrypt" - "traefik.http.services.immich.loadbalancer.server.port=3001" immich-microservices: image: ghcr.io/immich-app/immich-server:release command: ['start.sh', 'microservices'] environment: - DB_HOSTNAME=postgres-primary - DB_USERNAME=immich - DB_PASSWORD_FILE=/run/secrets/postgres_password - DB_DATABASE_NAME=immich - REDIS_HOSTNAME=redis-primary - REDIS_PASSWORD_FILE=/run/secrets/redis_password volumes: - immich-upload-data:/usr/src/app/upload secrets: - postgres_password - redis_password networks: - dmz-zone - data-zone deploy: replicas: 1 placement: constraints: - node.labels.role.compute == true resources: limits: memory: 2G cpus: '1.0' reservations: memory: 1G cpus: '0.5' immich-machine-learning: image: ghcr.io/immich-app/immich-machine-learning:release environment: - MACHINE_LEARNING_CACHE_FOLDER=/cache volumes: - immich-model-cache:/cache networks: - dmz-zone deploy: replicas: 1 placement: constraints: - node.labels.role.compute == true resources: limits: memory: 4G cpus: '2.0' reservations: memory: 2G cpus: '1.0' immich-web: image: ghcr.io/immich-app/immich-web:release environment: - IMMICH_SERVER_URL=http://immich-server:3001 networks: - dmz-zone deploy: replicas: 2 placement: preferences: - spread: node.labels.zone resources: limits: memory: 512M cpus: '0.5' reservations: memory: 256M cpus: '0.25' volumes: immich-upload-data: driver: local driver_opts: type: nfs o: addr=omv800,rw device: ":/mnt/storage/immich" immich-model-cache: driver: local networks: dmz-zone: external: true data-zone: external: true secrets: postgres_password: external: true redis_password: external: true COMPOSE_EOF log_success "Immich transformation configuration created" EOF # Create transformation for Jellyfin GPU optimization cat > "$transformations_dir/jellyfin_transformation.sh" << 'EOF' #!/bin/bash # Jellyfin Migration Transformation with GPU optimization set -euo pipefail source "$(dirname "$0")/../../../lib/error_handling.sh" log_step "Transforming Jellyfin configuration for GPU acceleration..." # Create GPU-optimized Jellyfin configuration cat > /tmp/jellyfin_compose_gpu.yml << 'COMPOSE_EOF' version: '3.8' services: jellyfin: image: jellyfin/jellyfin:latest environment: - JELLYFIN_PublishedServerUrl=https://jellyfin.${DOMAIN} - TZ=America/New_York volumes: - jellyfin-config:/config - jellyfin-cache:/cache - /mnt/media:/media:ro - /dev/dri:/dev/dri devices: - /dev/dri/renderD128:/dev/dri/renderD128 networks: - dmz-zone deploy: replicas: 1 placement: constraints: - node.labels.role.storage == true - node.labels.gpu.available == true resources: limits: memory: 4G cpus: '2.0' reservations: memory: 2G cpus: '1.0' # GPU reservation would be here in newer Compose versions labels: - "traefik.enable=true" - "traefik.http.routers.jellyfin.rule=Host(`jellyfin.${DOMAIN}`)" - "traefik.http.routers.jellyfin.entrypoints=websecure" - "traefik.http.routers.jellyfin.tls.certresolver=letsencrypt" - "traefik.http.services.jellyfin.loadbalancer.server.port=8096" healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8096/health"] interval: 30s timeout: 10s retries: 3 start_period: 60s volumes: jellyfin-config: driver: local driver_opts: type: nfs o: addr=omv800,rw device: ":/mnt/storage/jellyfin/config" jellyfin-cache: driver: local networks: dmz-zone: external: true COMPOSE_EOF # Create Jellyfin system.xml with GPU acceleration cat > /tmp/jellyfin_system.xml << 'XML_EOF' true vaapi /dev/dri/renderD128 true true true 4 /cache/transcoding-temp true 200 25 XML_EOF log_success "Jellyfin GPU transformation configuration created" EOF chmod +x "$transformations_dir"/*.sh log_success "Migration transformation scripts created in $transformations_dir" } # Function to validate migration transformations validate_migration_transformations() { log_step "Validating migration transformations..." local transformation_report="$VALIDATION_DIR/transformation_validation.json" # Initialize transformation validation report cat > "$transformation_report" << 'EOF' { "validation_timestamp": "", "transformations_validated": 0, "transformation_errors": 0, "transformations": {} } EOF jq --arg timestamp "$(date -Iseconds)" '.validation_timestamp = $timestamp' "$transformation_report" > "${transformation_report}.tmp" && mv "${transformation_report}.tmp" "$transformation_report" local transformations_validated=0 local transformation_errors=0 # Validate each transformation in CONFIG_TRANSFORMATIONS for service in "${!CONFIG_TRANSFORMATIONS[@]}"; do local transformations="${CONFIG_TRANSFORMATIONS[$service]}" log_info "Validating transformations for: $service" # Parse transformation requirements IFS=',' read -ra TRANSFORM_ARRAY <<< "$transformations" local service_validation="{\"service\": \"$service\", \"transformations\": [], \"status\": \"valid\"}" for transformation in "${TRANSFORM_ARRAY[@]}"; do local transform_valid=true local transform_issues=() case "$transformation" in "docker_networks") # Check if network segmentation is properly configured if ! docker network ls | grep -q "dmz-zone"; then transform_valid=false transform_issues+=("Network segmentation not configured") fi ;; "security_zones") # Check if security zones are defined if [[ ! -f "$SERVICE_CONFIGS_DIR/security/zones.yml" ]]; then transform_valid=false transform_issues+=("Security zones configuration missing") fi ;; "secrets_management") # Check if Docker secrets are configured if ! docker secret ls | grep -q "postgres_password"; then transform_valid=false transform_issues+=("Docker secrets not configured") fi ;; "resource_limits") # Check if resource limit templates exist if [[ ! -d "$SERVICE_CONFIGS_DIR/resources" ]]; then transform_valid=false transform_issues+=("Resource limit configurations missing") fi ;; "gpu_passthrough") # Check if GPU devices are available if [[ ! -d "/dev/dri" ]]; then transform_valid=false transform_issues+=("GPU devices not available") fi ;; esac # Update service validation local transform_result="{\"name\": \"$transformation\", \"valid\": $transform_valid, \"issues\": $(printf '%s\n' "${transform_issues[@]}" | jq -R . | jq -s . 2>/dev/null || echo '[]')}" service_validation=$(echo "$service_validation" | jq --argjson transform "$transform_result" '.transformations += [$transform]') if [[ "$transform_valid" == false ]]; then service_validation=$(echo "$service_validation" | jq '.status = "invalid"') ((transformation_errors++)) else ((transformations_validated++)) fi done # Update transformation report jq --arg service "$service" \ --argjson validation "$service_validation" \ '.transformations[$service] = $validation' "$transformation_report" > "${transformation_report}.tmp" && mv "${transformation_report}.tmp" "$transformation_report" done # Update final statistics jq --argjson validated "$transformations_validated" \ --argjson errors "$transformation_errors" \ '.transformations_validated = $validated | .transformation_errors = $errors' \ "$transformation_report" > "${transformation_report}.tmp" && mv "${transformation_report}.tmp" "$transformation_report" log_success "Transformation validation completed: $transformations_validated validated, $transformation_errors errors" if [[ $transformation_errors -gt 0 ]]; then log_warn "Transformation validation found $transformation_errors errors - check report: $transformation_report" fi return 0 } # Function to create comprehensive migration plan create_comprehensive_migration_plan() { log_step "Creating comprehensive migration execution plan..." local migration_plan="$VALIDATION_DIR/migration_execution_plan.json" # Create detailed migration plan based on service mappings cat > "$migration_plan" << 'EOF' { "plan_version": "2.0", "created_at": "", "estimated_duration_hours": 8, "rollback_points": 6, "phases": [], "prerequisites": [], "validation_checkpoints": [], "rollback_procedures": {} } EOF jq --arg timestamp "$(date -Iseconds)" '.created_at = $timestamp' "$migration_plan" > "${migration_plan}.tmp" && mv "${migration_plan}.tmp" "$migration_plan" # Add prerequisites local prerequisites='[ "All hosts accessible via SSH", "Docker Swarm cluster initialized", "Network segmentation configured", "Secrets management deployed", "Backup verification completed", "Monitoring systems operational" ]' jq --argjson prereqs "$prerequisites" '.prerequisites = $prereqs' "$migration_plan" > "${migration_plan}.tmp" && mv "${migration_plan}.tmp" "$migration_plan" # Add detailed migration phases local phases='[ { "phase": 1, "name": "Infrastructure Preparation", "duration_hours": 1, "services": ["network_zones", "storage_volumes", "secrets_deployment"], "parallel": false, "rollback_point": true, "validation_required": true }, { "phase": 2, "name": "Data Layer Migration", "duration_hours": 2, "services": ["postgres", "redis"], "parallel": false, "rollback_point": true, "validation_required": true, "critical": true }, { "phase": 3, "name": "Core Services Migration", "duration_hours": 1, "services": ["traefik"], "parallel": false, "rollback_point": true, "validation_required": true, "critical": true }, { "phase": 4, "name": "Application Services Migration", "duration_hours": 3, "services": ["immich-server", "jellyfin", "homeassistant", "appflowy", "paperless-ngx"], "parallel": true, "max_concurrent": 2, "rollback_point": true, "validation_required": true }, { "phase": 5, "name": "Supporting Services Migration", "duration_hours": 1, "services": ["immich-microservices", "immich-machine-learning", "immich-web", "esphome", "mqtt"], "parallel": true, "max_concurrent": 3, "rollback_point": true, "validation_required": true }, { "phase": 6, "name": "Monitoring and Management", "duration_hours": 1, "services": ["prometheus", "alertmanager", "grafana", "portainer"], "parallel": true, "rollback_point": true, "validation_required": false } ]' jq --argjson phases "$phases" '.phases = $phases' "$migration_plan" > "${migration_plan}.tmp" && mv "${migration_plan}.tmp" "$migration_plan" # Add validation checkpoints local checkpoints='[ { "checkpoint": 1, "phase": "Infrastructure Preparation", "validations": [ "Docker networks created", "Storage volumes accessible", "Secrets properly deployed", "Security zones functional" ] }, { "checkpoint": 2, "phase": "Data Layer Migration", "validations": [ "PostgreSQL replication working", "Redis clustering functional", "Database data integrity verified", "Connection pooling operational" ] }, { "checkpoint": 3, "phase": "Core Services Migration", "validations": [ "Traefik SSL certificates valid", "Load balancing functional", "Security middleware active", "Health checks passing" ] }, { "checkpoint": 4, "phase": "Application Services Migration", "validations": [ "All services responding to health checks", "Database connections established", "File storage accessible", "Authentication working" ] }, { "checkpoint": 5, "phase": "Supporting Services Migration", "validations": [ "Microservices communication working", "Queue processing functional", "Integration endpoints responding", "Real-time features operational" ] }, { "checkpoint": 6, "phase": "Final Validation", "validations": [ "All services healthy", "Performance metrics acceptable", "Security scans passed", "User acceptance tests completed" ] } ]' jq --argjson checkpoints "$checkpoints" '.validation_checkpoints = $checkpoints' "$migration_plan" > "${migration_plan}.tmp" && mv "${migration_plan}.tmp" "$migration_plan" log_success "Comprehensive migration plan created: $migration_plan" } # Main execution function main() { local action=${1:-"validate"} # Register cleanup and rollback functions register_cleanup cleanup_validation register_rollback rollback_validation case $action in "validate") log_step "Starting comprehensive service validation..." # Validate prerequisites validate_prerequisites ssh docker jq yq # Create service mappings create_service_mappings create_checkpoint "service_mappings_created" # Validate current state validate_current_state create_checkpoint "current_state_validated" # Create migration transformations create_migration_transformations create_checkpoint "transformations_created" # Validate transformations validate_migration_transformations create_checkpoint "transformations_validated" # Create comprehensive migration plan create_comprehensive_migration_plan create_checkpoint "migration_plan_created" log_success "βœ… Service migration validation completed!" log_info "πŸ“‹ Service mappings: $MIGRATION_MAPPINGS" log_info "πŸ“Š Validation results: $VALIDATION_DIR/" log_info "πŸ—ΊοΈ Migration plan: $VALIDATION_DIR/migration_execution_plan.json" ;; "current-state") validate_current_state ;; "transformations") create_migration_transformations validate_migration_transformations ;; "plan") create_comprehensive_migration_plan ;; "help"|*) cat << EOF Service Migration Validator Usage: $0 Actions: validate - Complete validation and planning (default) current-state - Only validate current service state transformations - Only create and validate transformations plan - Only create migration execution plan help - Show this help Examples: $0 validate $0 current-state $0 transformations EOF ;; esac } # Execute main function main "$@"