Files
HomeAudit/migration_scripts/scripts/service_migration_validator.sh
2025-08-24 11:13:39 -04:00

1270 lines
42 KiB
Bash
Executable File

#!/bin/bash
# Service Migration Validator
# Ensures complete and accurate migration of ALL services with configuration modifications
# Import error handling library
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "$SCRIPT_DIR/lib/error_handling.sh"
# Configuration
readonly VALIDATION_DIR="/opt/migration/validation"
readonly SERVICE_CONFIGS_DIR="/opt/migration/configs/services"
readonly MIGRATION_MAPPINGS="/opt/migration/configs/service_mappings.yml"
readonly PRE_MIGRATION_SNAPSHOT="/opt/migration/backups/latest"
# Complete service inventory with all dependencies and configurations
declare -A SERVICE_INVENTORY=(
# Storage and Database Services
["postgres"]="config:/var/lib/postgresql/data/postgresql.conf,data:/var/lib/postgresql/data,secrets:postgres_password,networks:data-zone,dependencies:none"
["redis"]="config:/data/redis.conf,data:/data,secrets:redis_password,networks:data-zone,dependencies:none"
# Core Application Services
["traefik"]="config:/etc/traefik,data:/certificates,secrets:traefik_users,networks:public-zone,dependencies:none"
["immich-server"]="config:/usr/src/app/upload,data:/usr/src/app/upload,secrets:postgres_password,networks:dmz-zone,dependencies:postgres,redis"
["immich-microservices"]="config:shared,data:shared,secrets:postgres_password,networks:dmz-zone,dependencies:postgres,redis,immich-server"
["immich-machine-learning"]="config:/cache,data:/cache,secrets:none,networks:dmz-zone,dependencies:immich-server"
["immich-web"]="config:none,data:none,secrets:none,networks:dmz-zone,dependencies:immich-server"
["jellyfin"]="config:/config,data:/media,secrets:none,networks:dmz-zone,dependencies:none"
["homeassistant"]="config:/config,data:/config,secrets:none,networks:dmz-zone,dependencies:none"
["appflowy"]="config:/app/data,data:/app/data,secrets:postgres_password,networks:dmz-zone,dependencies:postgres"
["paperless-ngx"]="config:/usr/src/paperless/data,data:/usr/src/paperless/media,secrets:postgres_password,networks:dmz-zone,dependencies:postgres,redis"
# Management and Monitoring Services
["portainer"]="config:/data,data:/data,secrets:none,networks:management-zone,dependencies:none"
["grafana"]="config:/etc/grafana,data:/var/lib/grafana,secrets:grafana_admin_password,networks:monitoring-zone,dependencies:none"
["prometheus"]="config:/etc/prometheus,data:/prometheus,secrets:none,networks:monitoring-zone,dependencies:none"
["alertmanager"]="config:/etc/alertmanager,data:/alertmanager,secrets:none,networks:monitoring-zone,dependencies:prometheus"
["cadvisor"]="config:none,data:none,secrets:none,networks:monitoring-zone,dependencies:none"
["node-exporter"]="config:none,data:none,secrets:none,networks:monitoring-zone,dependencies:none"
# IoT and Automation Services
["esphome"]="config:/config,data:/config,secrets:none,networks:dmz-zone,dependencies:homeassistant"
["mqtt"]="config:/mosquitto/config,data:/mosquitto/data,secrets:none,networks:dmz-zone,dependencies:homeassistant"
# Backup and Utility Services
["duplicati"]="config:/data,data:/backups,secrets:none,networks:internal-zone,dependencies:none"
["watchtower"]="config:none,data:none,secrets:none,networks:management-zone,dependencies:none"
)
# Service configuration transformations needed for new architecture
declare -A CONFIG_TRANSFORMATIONS=(
["all"]="docker_networks,security_zones,secrets_management,resource_limits,health_checks"
["postgres"]="replication_setup,connection_pooling,performance_tuning"
["redis"]="clustering,persistence_config,memory_optimization"
["traefik"]="network_segmentation,ssl_enhancement,middleware_updates"
["immich-server"]="database_connection_update,redis_connection_update,file_path_updates"
["jellyfin"]="gpu_passthrough,transcoding_optimization,network_updates"
["homeassistant"]="mqtt_integration,device_discovery,automation_updates"
["grafana"]="datasource_updates,dashboard_provisioning,authentication_updates"
["prometheus"]="target_updates,alert_rule_updates,storage_optimization"
)
# Cleanup function
cleanup_validation() {
log_info "Cleaning up validation temporary files..."
# Clean up temporary comparison files
find /tmp -name "validation_*.tmp" -mmin +60 -delete 2>/dev/null || true
find /tmp -name "service_*.tmp" -mmin +60 -delete 2>/dev/null || true
log_info "Validation cleanup completed"
}
# Rollback function
rollback_validation() {
log_info "Rolling back validation processes..."
# Stop any running validation containers
docker ps --filter "name=validation_*" -q | xargs -r docker stop 2>/dev/null || true
docker ps -a --filter "name=validation_*" -q | xargs -r docker rm 2>/dev/null || true
cleanup_validation
log_info "Validation rollback completed"
}
# Function to create service mapping configuration
create_service_mappings() {
log_step "Creating comprehensive service mapping configuration..."
mkdir -p "$(dirname "$MIGRATION_MAPPINGS")"
cat > "$MIGRATION_MAPPINGS" << 'EOF'
# Service Migration Mappings
# Defines how services map from current to new architecture
version: "2.0"
# Global transformations applied to all services
global_transformations:
networks:
old_network: "default"
new_networks:
public: ["traefik"]
dmz: ["immich-server", "immich-web", "immich-microservices", "immich-machine-learning",
"jellyfin", "homeassistant", "appflowy", "paperless-ngx", "esphome", "mqtt"]
internal: ["portainer", "duplicati", "watchtower"]
data: ["postgres", "redis"]
monitoring: ["grafana", "prometheus", "alertmanager", "cadvisor", "node-exporter"]
security:
secrets_migration:
hardcoded_passwords: "docker_secrets"
config_files: "docker_configs"
certificates: "docker_secrets"
resource_constraints:
cpu_limits: true
memory_limits: true
restart_policies: true
health_checks: true
# Service-specific mappings and transformations
services:
postgres:
current_location: "omv800:/var/lib/docker/volumes/postgres_data"
new_location: "postgres-primary-data"
transformations:
- type: "replication_setup"
config:
primary_host: "omv800"
replica_host: "fedora"
streaming_replication: true
- type: "connection_pooling"
config:
max_connections: 200
shared_buffers: "2GB"
effective_cache_size: "6GB"
- type: "backup_configuration"
config:
wal_archiving: true
point_in_time_recovery: true
dependencies:
before: []
after: ["network_setup", "storage_setup"]
redis:
current_location: "omv800:/var/lib/docker/volumes/redis_data"
new_location: "redis-primary-data"
transformations:
- type: "clustering_setup"
config:
cluster_mode: true
replicas: 2
sentinel: true
- type: "persistence_config"
config:
save_frequency: "900 1"
aof_enabled: true
aof_fsync: "everysec"
dependencies:
before: []
after: ["network_setup"]
immich-server:
current_location: "omv800:/var/lib/docker/volumes/immich_server_data"
new_location: "immich-server-data"
transformations:
- type: "database_connection"
config:
old_host: "postgres"
new_host: "postgres-primary"
connection_string_update: true
- type: "redis_connection"
config:
old_host: "redis"
new_host: "redis-primary"
session_storage_update: true
- type: "storage_optimization"
config:
file_storage: "/usr/src/app/upload"
thumbnail_storage: "/usr/src/app/upload/thumbs"
nfs_optimization: true
dependencies:
before: ["postgres", "redis"]
after: ["immich-microservices", "immich-machine-learning"]
immich-microservices:
current_location: "shared_with_server"
new_location: "shared_with_server"
transformations:
- type: "job_queue_optimization"
config:
queue_backend: "redis"
concurrent_jobs: 4
memory_limit: "2GB"
dependencies:
before: ["immich-server", "redis"]
after: []
immich-machine-learning:
current_location: "omv800:/var/lib/docker/volumes/immich_ml_cache"
new_location: "immich-ml-cache"
transformations:
- type: "gpu_optimization"
config:
gpu_enabled: true
gpu_device: "/dev/dri"
tensorflow_optimization: true
- type: "model_caching"
config:
cache_size: "10GB"
model_download: true
dependencies:
before: ["immich-server"]
after: []
jellyfin:
current_location: "omv800:/var/lib/docker/volumes/jellyfin_config"
new_location: "jellyfin-config"
transformations:
- type: "hardware_transcoding"
config:
gpu_acceleration: true
vaapi_device: "/dev/dri/renderD128"
hardware_decoding: true
- type: "media_optimization"
config:
media_path: "/media"
transcoding_temp: "/tmp/jellyfin"
cache_optimization: true
- type: "network_optimization"
config:
local_network_subnets: ["192.168.50.0/24", "10.0.0.0/8"]
external_access: true
dependencies:
before: []
after: []
homeassistant:
current_location: "jonathan-2518f5u:/var/lib/docker/volumes/homeassistant_config"
new_location: "homeassistant-config"
transformations:
- type: "device_discovery"
config:
mdns_enabled: true
upnp_enabled: true
network_discovery: ["192.168.50.0/24"]
- type: "integration_updates"
config:
mqtt_broker: "mqtt-broker"
database_url_update: true
recorder_optimization: true
- type: "automation_migration"
config:
yaml_validation: true
deprecated_features: "update"
dependencies:
before: ["mqtt"]
after: ["esphome"]
traefik:
current_location: "omv800:/var/lib/docker/volumes/traefik_data"
new_location: "traefik-certificates"
transformations:
- type: "network_segmentation"
config:
frontend_network: "public-zone"
backend_networks: ["dmz-zone", "internal-zone", "monitoring-zone"]
network_isolation: true
- type: "ssl_enhancement"
config:
tls_version_min: "1.2"
cipher_suites: "secure_only"
hsts_enabled: true
- type: "middleware_updates"
config:
rate_limiting: true
security_headers: true
authentication: "secrets_based"
dependencies:
before: ["network_setup"]
after: ["all_backend_services"]
grafana:
current_location: "audrey:/var/lib/docker/volumes/grafana_data"
new_location: "grafana-data"
transformations:
- type: "datasource_provisioning"
config:
prometheus_url: "http://prometheus:9090"
loki_url: "http://loki:3100"
automatic_provisioning: true
- type: "dashboard_migration"
config:
dashboard_provisioning: true
plugin_updates: true
theme_consistency: true
dependencies:
before: ["prometheus"]
after: []
prometheus:
current_location: "audrey:/var/lib/docker/volumes/prometheus_data"
new_location: "prometheus-data"
transformations:
- type: "target_discovery"
config:
service_discovery: "docker_swarm"
static_targets: "update"
scrape_interval_optimization: true
- type: "storage_optimization"
config:
retention_time: "30d"
storage_path: "/prometheus"
wal_compression: true
dependencies:
before: []
after: ["alertmanager", "grafana"]
# Migration phases and ordering
migration_phases:
phase_1_infrastructure:
order: 1
services: ["network_setup", "storage_setup", "secrets_setup"]
parallel: false
phase_2_data_layer:
order: 2
services: ["postgres", "redis"]
parallel: false
validation_required: true
phase_3_core_services:
order: 3
services: ["traefik"]
parallel: false
health_check_required: true
phase_4_application_services:
order: 4
services: ["immich-server", "jellyfin", "homeassistant", "appflowy", "paperless-ngx"]
parallel: true
max_concurrent: 2
validation_required: true
phase_5_dependent_services:
order: 5
services: ["immich-microservices", "immich-machine-learning", "immich-web", "esphome", "mqtt"]
parallel: true
max_concurrent: 3
phase_6_monitoring:
order: 6
services: ["prometheus", "alertmanager", "grafana", "cadvisor", "node-exporter"]
parallel: true
max_concurrent: 3
phase_7_management:
order: 7
services: ["portainer", "duplicati", "watchtower"]
parallel: true
# Validation requirements
validation_requirements:
data_integrity:
database_checksums: true
file_checksums: true
configuration_validation: true
functionality_testing:
health_endpoints: true
authentication_testing: true
integration_testing: true
performance_baseline: true
rollback_preparation:
backup_verification: true
rollback_scripts: true
emergency_procedures: true
EOF
log_success "Service mapping configuration created: $MIGRATION_MAPPINGS"
}
# Function to validate current service state
validate_current_state() {
log_step "Validating current service state and configurations..."
local validation_report="$VALIDATION_DIR/current_state_validation.json"
mkdir -p "$VALIDATION_DIR"
# Initialize validation report
cat > "$validation_report" << 'EOF'
{
"validation_timestamp": "",
"total_services": 0,
"services_validated": 0,
"validation_errors": 0,
"services": {},
"missing_configurations": [],
"missing_data": [],
"dependency_issues": []
}
EOF
# Update timestamp
jq --arg timestamp "$(date -Iseconds)" '.validation_timestamp = $timestamp' "$validation_report" > "${validation_report}.tmp" && mv "${validation_report}.tmp" "$validation_report"
local services_validated=0
local validation_errors=0
# Validate each service in the inventory
for service in "${!SERVICE_INVENTORY[@]}"; do
log_info "Validating service: $service"
local service_info="${SERVICE_INVENTORY[$service]}"
local validation_result=$(validate_individual_service "$service" "$service_info")
# Update validation report
jq --arg service "$service" \
--argjson result "$validation_result" \
'.services[$service] = $result' "$validation_report" > "${validation_report}.tmp" && mv "${validation_report}.tmp" "$validation_report"
# Check validation result
local service_status=$(echo "$validation_result" | jq -r '.status')
if [[ "$service_status" == "valid" ]]; then
((services_validated++))
else
((validation_errors++))
log_warn "Validation issues found for $service"
fi
done
# Update final statistics
jq --argjson total "${#SERVICE_INVENTORY[@]}" \
--argjson validated "$services_validated" \
--argjson errors "$validation_errors" \
'.total_services = $total | .services_validated = $validated | .validation_errors = $errors' \
"$validation_report" > "${validation_report}.tmp" && mv "${validation_report}.tmp" "$validation_report"
log_success "Current state validation completed: $services_validated/${#SERVICE_INVENTORY[@]} services validated"
if [[ $validation_errors -gt 0 ]]; then
log_error "$validation_errors validation errors found - check report: $validation_report"
return 1
fi
return 0
}
# Function to validate individual service
validate_individual_service() {
local service_name=$1
local service_info=$2
# Parse service information
local config_path=$(echo "$service_info" | cut -d',' -f1 | cut -d':' -f2-)
local data_path=$(echo "$service_info" | cut -d',' -f2 | cut -d':' -f2-)
local secrets=$(echo "$service_info" | cut -d',' -f3 | cut -d':' -f2-)
local networks=$(echo "$service_info" | cut -d',' -f4 | cut -d':' -f2-)
local dependencies=$(echo "$service_info" | cut -d',' -f5 | cut -d':' -f2-)
local validation_issues=()
local validation_status="valid"
# Find the host where this service is currently running
local service_host=""
local service_container=""
# Check each host for the service
for host in omv800 fedora surface jonathan-2518f5u audrey; do
local container_check=$(ssh -o ConnectTimeout=10 "$host" "docker ps --format '{{.Names}}' | grep -i '$service_name'" 2>/dev/null | head -1 || echo "")
if [[ -n "$container_check" ]]; then
service_host="$host"
service_container="$container_check"
break
fi
done
# If service not found, check if it's a new service that doesn't exist yet
if [[ -z "$service_host" ]]; then
# Check if this is a service that should exist
case "$service_name" in
"postgres"|"redis"|"traefik"|"immich-server"|"jellyfin"|"homeassistant"|"grafana"|"prometheus")
validation_issues+=("Service $service_name not found on any host")
validation_status="missing"
;;
*)
# May be a new service or microservice
log_debug "Service $service_name not found - may be new or decomposed service"
;;
esac
else
# Validate service configuration
if [[ "$config_path" != "none" ]] && [[ "$config_path" != "shared" ]]; then
if ! ssh "$service_host" "test -d $config_path || docker exec $service_container test -d $config_path" 2>/dev/null; then
validation_issues+=("Configuration path not found: $config_path")
validation_status="config_missing"
fi
fi
# Validate service data
if [[ "$data_path" != "none" ]] && [[ "$data_path" != "shared" ]]; then
if ! ssh "$service_host" "test -d $data_path || docker exec $service_container test -d $data_path" 2>/dev/null; then
validation_issues+=("Data path not found: $data_path")
validation_status="data_missing"
fi
fi
# Validate service health
local health_status="unknown"
if [[ -n "$service_container" ]]; then
health_status=$(ssh "$service_host" "docker inspect --format='{{.State.Health.Status}}' $service_container" 2>/dev/null || echo "unknown")
if [[ "$health_status" == "unhealthy" ]]; then
validation_issues+=("Service health check failing")
validation_status="unhealthy"
fi
fi
fi
# Validate dependencies
if [[ "$dependencies" != "none" ]]; then
IFS=',' read -ra DEPS <<< "$dependencies"
for dep in "${DEPS[@]}"; do
local dep_found=false
for host in omv800 fedora surface jonathan-2518f5u audrey; do
if ssh -o ConnectTimeout=10 "$host" "docker ps --format '{{.Names}}' | grep -i '$dep'" >/dev/null 2>&1; then
dep_found=true
break
fi
done
if [[ "$dep_found" == false ]]; then
validation_issues+=("Dependency not found: $dep")
if [[ "$validation_status" == "valid" ]]; then
validation_status="dependency_missing"
fi
fi
done
fi
# Create validation result JSON
local issues_json="[]"
if [[ ${#validation_issues[@]} -gt 0 ]]; then
issues_json=$(printf '%s\n' "${validation_issues[@]}" | jq -R . | jq -s .)
fi
cat << EOF
{
"service": "$service_name",
"status": "$validation_status",
"current_host": "$service_host",
"current_container": "$service_container",
"config_path": "$config_path",
"data_path": "$data_path",
"secrets": "$secrets",
"networks": "$networks",
"dependencies": "$dependencies",
"validation_issues": $issues_json,
"validated_at": "$(date -Iseconds)"
}
EOF
}
# Function to create migration transformation scripts
create_migration_transformations() {
log_step "Creating service migration transformation scripts..."
local transformations_dir="$SERVICE_CONFIGS_DIR/transformations"
mkdir -p "$transformations_dir"
# Create transformation for PostgreSQL clustering
cat > "$transformations_dir/postgres_transformation.sh" << 'EOF'
#!/bin/bash
# PostgreSQL Migration Transformation
set -euo pipefail
source "$(dirname "$0")/../../../lib/error_handling.sh"
log_step "Transforming PostgreSQL configuration for clustering..."
# Create new PostgreSQL configuration with clustering
cat > /tmp/postgresql.conf << 'PG_EOF'
# PostgreSQL Configuration for Docker Swarm Clustering
listen_addresses = '*'
port = 5432
# Memory settings
shared_buffers = 2GB
effective_cache_size = 6GB
maintenance_work_mem = 512MB
work_mem = 32MB
# WAL settings for replication
wal_level = replica
archive_mode = on
archive_command = 'test ! -f /var/lib/postgresql/wal_archive/%f && cp %p /var/lib/postgresql/wal_archive/%f'
max_wal_senders = 3
wal_keep_segments = 64
# Replication settings
hot_standby = on
hot_standby_feedback = on
# Connection settings
max_connections = 200
shared_preload_libraries = 'pg_stat_statements'
# Logging
log_destination = 'stderr'
logging_collector = on
log_directory = 'log'
log_filename = 'postgresql-%Y-%m-%d_%H%M%S.log'
log_statement = 'mod'
log_min_duration_statement = 1000
# Performance tuning
checkpoint_completion_target = 0.9
wal_buffers = 64MB
default_statistics_target = 100
random_page_cost = 1.1
effective_io_concurrency = 200
PG_EOF
# Create replication user setup
cat > /tmp/setup_replication.sql << 'SQL_EOF'
-- Create replication user
CREATE USER replicator WITH REPLICATION ENCRYPTED PASSWORD 'replication_password_here';
-- Create monitoring user
CREATE USER pg_monitor WITH PASSWORD 'monitoring_password_here';
GRANT pg_monitor TO pg_monitor;
-- Optimize for performance
CREATE EXTENSION IF NOT EXISTS pg_stat_statements;
CREATE EXTENSION IF NOT EXISTS pg_buffercache;
SQL_EOF
log_success "PostgreSQL transformation configuration created"
EOF
# Create transformation for Immich service updates
cat > "$transformations_dir/immich_transformation.sh" << 'EOF'
#!/bin/bash
# Immich Migration Transformation
set -euo pipefail
source "$(dirname "$0")/../../../lib/error_handling.sh"
log_step "Transforming Immich configuration for new architecture..."
# Update database connection configuration
cat > /tmp/immich_database_config.json << 'JSON_EOF'
{
"database": {
"host": "postgres-primary",
"port": 5432,
"username": "immich",
"database": "immich",
"passwordFile": "/run/secrets/postgres_password"
},
"redis": {
"host": "redis-primary",
"port": 6379,
"passwordFile": "/run/secrets/redis_password"
},
"storage": {
"uploadPath": "/usr/src/app/upload",
"thumbnailPath": "/usr/src/app/upload/thumbs",
"profileImagePath": "/usr/src/app/upload/profile",
"videoPath": "/usr/src/app/upload/videos"
},
"machinelearning": {
"enabled": true,
"url": "http://immich-machine-learning:3003",
"clipModel": "ViT-B-32__openai"
}
}
JSON_EOF
# Create optimized Docker Compose configuration for Immich
cat > /tmp/immich_compose_optimized.yml << 'COMPOSE_EOF'
version: '3.8'
services:
immich-server:
image: ghcr.io/immich-app/immich-server:release
environment:
- DB_HOSTNAME=postgres-primary
- DB_USERNAME=immich
- DB_PASSWORD_FILE=/run/secrets/postgres_password
- DB_DATABASE_NAME=immich
- REDIS_HOSTNAME=redis-primary
- REDIS_PASSWORD_FILE=/run/secrets/redis_password
- UPLOAD_LOCATION=/usr/src/app/upload
volumes:
- immich-upload-data:/usr/src/app/upload
- /etc/localtime:/etc/localtime:ro
secrets:
- postgres_password
- redis_password
networks:
- dmz-zone
- data-zone
deploy:
replicas: 2
placement:
constraints:
- node.labels.role.storage == true
resources:
limits:
memory: 2G
cpus: '1.0'
reservations:
memory: 1G
cpus: '0.5'
labels:
- "traefik.enable=true"
- "traefik.http.routers.immich.rule=Host(`immich.${DOMAIN}`)"
- "traefik.http.routers.immich.entrypoints=websecure"
- "traefik.http.routers.immich.tls.certresolver=letsencrypt"
- "traefik.http.services.immich.loadbalancer.server.port=3001"
immich-microservices:
image: ghcr.io/immich-app/immich-server:release
command: ['start.sh', 'microservices']
environment:
- DB_HOSTNAME=postgres-primary
- DB_USERNAME=immich
- DB_PASSWORD_FILE=/run/secrets/postgres_password
- DB_DATABASE_NAME=immich
- REDIS_HOSTNAME=redis-primary
- REDIS_PASSWORD_FILE=/run/secrets/redis_password
volumes:
- immich-upload-data:/usr/src/app/upload
secrets:
- postgres_password
- redis_password
networks:
- dmz-zone
- data-zone
deploy:
replicas: 1
placement:
constraints:
- node.labels.role.compute == true
resources:
limits:
memory: 2G
cpus: '1.0'
reservations:
memory: 1G
cpus: '0.5'
immich-machine-learning:
image: ghcr.io/immich-app/immich-machine-learning:release
environment:
- MACHINE_LEARNING_CACHE_FOLDER=/cache
volumes:
- immich-model-cache:/cache
networks:
- dmz-zone
deploy:
replicas: 1
placement:
constraints:
- node.labels.role.compute == true
resources:
limits:
memory: 4G
cpus: '2.0'
reservations:
memory: 2G
cpus: '1.0'
immich-web:
image: ghcr.io/immich-app/immich-web:release
environment:
- IMMICH_SERVER_URL=http://immich-server:3001
networks:
- dmz-zone
deploy:
replicas: 2
placement:
preferences:
- spread: node.labels.zone
resources:
limits:
memory: 512M
cpus: '0.5'
reservations:
memory: 256M
cpus: '0.25'
volumes:
immich-upload-data:
driver: local
driver_opts:
type: nfs
o: addr=omv800,rw
device: ":/mnt/storage/immich"
immich-model-cache:
driver: local
networks:
dmz-zone:
external: true
data-zone:
external: true
secrets:
postgres_password:
external: true
redis_password:
external: true
COMPOSE_EOF
log_success "Immich transformation configuration created"
EOF
# Create transformation for Jellyfin GPU optimization
cat > "$transformations_dir/jellyfin_transformation.sh" << 'EOF'
#!/bin/bash
# Jellyfin Migration Transformation with GPU optimization
set -euo pipefail
source "$(dirname "$0")/../../../lib/error_handling.sh"
log_step "Transforming Jellyfin configuration for GPU acceleration..."
# Create GPU-optimized Jellyfin configuration
cat > /tmp/jellyfin_compose_gpu.yml << 'COMPOSE_EOF'
version: '3.8'
services:
jellyfin:
image: jellyfin/jellyfin:latest
environment:
- JELLYFIN_PublishedServerUrl=https://jellyfin.${DOMAIN}
- TZ=America/New_York
volumes:
- jellyfin-config:/config
- jellyfin-cache:/cache
- /mnt/media:/media:ro
- /dev/dri:/dev/dri
devices:
- /dev/dri/renderD128:/dev/dri/renderD128
networks:
- dmz-zone
deploy:
replicas: 1
placement:
constraints:
- node.labels.role.storage == true
- node.labels.gpu.available == true
resources:
limits:
memory: 4G
cpus: '2.0'
reservations:
memory: 2G
cpus: '1.0'
# GPU reservation would be here in newer Compose versions
labels:
- "traefik.enable=true"
- "traefik.http.routers.jellyfin.rule=Host(`jellyfin.${DOMAIN}`)"
- "traefik.http.routers.jellyfin.entrypoints=websecure"
- "traefik.http.routers.jellyfin.tls.certresolver=letsencrypt"
- "traefik.http.services.jellyfin.loadbalancer.server.port=8096"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8096/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 60s
volumes:
jellyfin-config:
driver: local
driver_opts:
type: nfs
o: addr=omv800,rw
device: ":/mnt/storage/jellyfin/config"
jellyfin-cache:
driver: local
networks:
dmz-zone:
external: true
COMPOSE_EOF
# Create Jellyfin system.xml with GPU acceleration
cat > /tmp/jellyfin_system.xml << 'XML_EOF'
<?xml version="1.0" encoding="utf-8"?>
<ServerConfiguration xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<EnableHardwareAcceleration>true</EnableHardwareAcceleration>
<HardwareAccelerationType>vaapi</HardwareAccelerationType>
<VaapiDevice>/dev/dri/renderD128</VaapiDevice>
<EnableHardwareDecoding>true</EnableHardwareDecoding>
<EnableHardwareEncoding>true</EnableHardwareEncoding>
<AllowHevcEncoding>true</AllowHevcEncoding>
<MaxConcurrentTranscodes>4</MaxConcurrentTranscodes>
<TranscodingTempPath>/cache/transcoding-temp</TranscodingTempPath>
<EnableFasterSeeking>true</EnableFasterSeeking>
<SegmentKeepSeconds>200</SegmentKeepSeconds>
<ProgressiveDownloadPercentage>25</ProgressiveDownloadPercentage>
</ServerConfiguration>
XML_EOF
log_success "Jellyfin GPU transformation configuration created"
EOF
chmod +x "$transformations_dir"/*.sh
log_success "Migration transformation scripts created in $transformations_dir"
}
# Function to validate migration transformations
validate_migration_transformations() {
log_step "Validating migration transformations..."
local transformation_report="$VALIDATION_DIR/transformation_validation.json"
# Initialize transformation validation report
cat > "$transformation_report" << 'EOF'
{
"validation_timestamp": "",
"transformations_validated": 0,
"transformation_errors": 0,
"transformations": {}
}
EOF
jq --arg timestamp "$(date -Iseconds)" '.validation_timestamp = $timestamp' "$transformation_report" > "${transformation_report}.tmp" && mv "${transformation_report}.tmp" "$transformation_report"
local transformations_validated=0
local transformation_errors=0
# Validate each transformation in CONFIG_TRANSFORMATIONS
for service in "${!CONFIG_TRANSFORMATIONS[@]}"; do
local transformations="${CONFIG_TRANSFORMATIONS[$service]}"
log_info "Validating transformations for: $service"
# Parse transformation requirements
IFS=',' read -ra TRANSFORM_ARRAY <<< "$transformations"
local service_validation="{\"service\": \"$service\", \"transformations\": [], \"status\": \"valid\"}"
for transformation in "${TRANSFORM_ARRAY[@]}"; do
local transform_valid=true
local transform_issues=()
case "$transformation" in
"docker_networks")
# Check if network segmentation is properly configured
if ! docker network ls | grep -q "dmz-zone"; then
transform_valid=false
transform_issues+=("Network segmentation not configured")
fi
;;
"security_zones")
# Check if security zones are defined
if [[ ! -f "$SERVICE_CONFIGS_DIR/security/zones.yml" ]]; then
transform_valid=false
transform_issues+=("Security zones configuration missing")
fi
;;
"secrets_management")
# Check if Docker secrets are configured
if ! docker secret ls | grep -q "postgres_password"; then
transform_valid=false
transform_issues+=("Docker secrets not configured")
fi
;;
"resource_limits")
# Check if resource limit templates exist
if [[ ! -d "$SERVICE_CONFIGS_DIR/resources" ]]; then
transform_valid=false
transform_issues+=("Resource limit configurations missing")
fi
;;
"gpu_passthrough")
# Check if GPU devices are available
if [[ ! -d "/dev/dri" ]]; then
transform_valid=false
transform_issues+=("GPU devices not available")
fi
;;
esac
# Update service validation
local transform_result="{\"name\": \"$transformation\", \"valid\": $transform_valid, \"issues\": $(printf '%s\n' "${transform_issues[@]}" | jq -R . | jq -s . 2>/dev/null || echo '[]')}"
service_validation=$(echo "$service_validation" | jq --argjson transform "$transform_result" '.transformations += [$transform]')
if [[ "$transform_valid" == false ]]; then
service_validation=$(echo "$service_validation" | jq '.status = "invalid"')
((transformation_errors++))
else
((transformations_validated++))
fi
done
# Update transformation report
jq --arg service "$service" \
--argjson validation "$service_validation" \
'.transformations[$service] = $validation' "$transformation_report" > "${transformation_report}.tmp" && mv "${transformation_report}.tmp" "$transformation_report"
done
# Update final statistics
jq --argjson validated "$transformations_validated" \
--argjson errors "$transformation_errors" \
'.transformations_validated = $validated | .transformation_errors = $errors' \
"$transformation_report" > "${transformation_report}.tmp" && mv "${transformation_report}.tmp" "$transformation_report"
log_success "Transformation validation completed: $transformations_validated validated, $transformation_errors errors"
if [[ $transformation_errors -gt 0 ]]; then
log_warn "Transformation validation found $transformation_errors errors - check report: $transformation_report"
fi
return 0
}
# Function to create comprehensive migration plan
create_comprehensive_migration_plan() {
log_step "Creating comprehensive migration execution plan..."
local migration_plan="$VALIDATION_DIR/migration_execution_plan.json"
# Create detailed migration plan based on service mappings
cat > "$migration_plan" << 'EOF'
{
"plan_version": "2.0",
"created_at": "",
"estimated_duration_hours": 8,
"rollback_points": 6,
"phases": [],
"prerequisites": [],
"validation_checkpoints": [],
"rollback_procedures": {}
}
EOF
jq --arg timestamp "$(date -Iseconds)" '.created_at = $timestamp' "$migration_plan" > "${migration_plan}.tmp" && mv "${migration_plan}.tmp" "$migration_plan"
# Add prerequisites
local prerequisites='[
"All hosts accessible via SSH",
"Docker Swarm cluster initialized",
"Network segmentation configured",
"Secrets management deployed",
"Backup verification completed",
"Monitoring systems operational"
]'
jq --argjson prereqs "$prerequisites" '.prerequisites = $prereqs' "$migration_plan" > "${migration_plan}.tmp" && mv "${migration_plan}.tmp" "$migration_plan"
# Add detailed migration phases
local phases='[
{
"phase": 1,
"name": "Infrastructure Preparation",
"duration_hours": 1,
"services": ["network_zones", "storage_volumes", "secrets_deployment"],
"parallel": false,
"rollback_point": true,
"validation_required": true
},
{
"phase": 2,
"name": "Data Layer Migration",
"duration_hours": 2,
"services": ["postgres", "redis"],
"parallel": false,
"rollback_point": true,
"validation_required": true,
"critical": true
},
{
"phase": 3,
"name": "Core Services Migration",
"duration_hours": 1,
"services": ["traefik"],
"parallel": false,
"rollback_point": true,
"validation_required": true,
"critical": true
},
{
"phase": 4,
"name": "Application Services Migration",
"duration_hours": 3,
"services": ["immich-server", "jellyfin", "homeassistant", "appflowy", "paperless-ngx"],
"parallel": true,
"max_concurrent": 2,
"rollback_point": true,
"validation_required": true
},
{
"phase": 5,
"name": "Supporting Services Migration",
"duration_hours": 1,
"services": ["immich-microservices", "immich-machine-learning", "immich-web", "esphome", "mqtt"],
"parallel": true,
"max_concurrent": 3,
"rollback_point": true,
"validation_required": true
},
{
"phase": 6,
"name": "Monitoring and Management",
"duration_hours": 1,
"services": ["prometheus", "alertmanager", "grafana", "portainer"],
"parallel": true,
"rollback_point": true,
"validation_required": false
}
]'
jq --argjson phases "$phases" '.phases = $phases' "$migration_plan" > "${migration_plan}.tmp" && mv "${migration_plan}.tmp" "$migration_plan"
# Add validation checkpoints
local checkpoints='[
{
"checkpoint": 1,
"phase": "Infrastructure Preparation",
"validations": [
"Docker networks created",
"Storage volumes accessible",
"Secrets properly deployed",
"Security zones functional"
]
},
{
"checkpoint": 2,
"phase": "Data Layer Migration",
"validations": [
"PostgreSQL replication working",
"Redis clustering functional",
"Database data integrity verified",
"Connection pooling operational"
]
},
{
"checkpoint": 3,
"phase": "Core Services Migration",
"validations": [
"Traefik SSL certificates valid",
"Load balancing functional",
"Security middleware active",
"Health checks passing"
]
},
{
"checkpoint": 4,
"phase": "Application Services Migration",
"validations": [
"All services responding to health checks",
"Database connections established",
"File storage accessible",
"Authentication working"
]
},
{
"checkpoint": 5,
"phase": "Supporting Services Migration",
"validations": [
"Microservices communication working",
"Queue processing functional",
"Integration endpoints responding",
"Real-time features operational"
]
},
{
"checkpoint": 6,
"phase": "Final Validation",
"validations": [
"All services healthy",
"Performance metrics acceptable",
"Security scans passed",
"User acceptance tests completed"
]
}
]'
jq --argjson checkpoints "$checkpoints" '.validation_checkpoints = $checkpoints' "$migration_plan" > "${migration_plan}.tmp" && mv "${migration_plan}.tmp" "$migration_plan"
log_success "Comprehensive migration plan created: $migration_plan"
}
# Main execution function
main() {
local action=${1:-"validate"}
# Register cleanup and rollback functions
register_cleanup cleanup_validation
register_rollback rollback_validation
case $action in
"validate")
log_step "Starting comprehensive service validation..."
# Validate prerequisites
validate_prerequisites ssh docker jq yq
# Create service mappings
create_service_mappings
create_checkpoint "service_mappings_created"
# Validate current state
validate_current_state
create_checkpoint "current_state_validated"
# Create migration transformations
create_migration_transformations
create_checkpoint "transformations_created"
# Validate transformations
validate_migration_transformations
create_checkpoint "transformations_validated"
# Create comprehensive migration plan
create_comprehensive_migration_plan
create_checkpoint "migration_plan_created"
log_success "✅ Service migration validation completed!"
log_info "📋 Service mappings: $MIGRATION_MAPPINGS"
log_info "📊 Validation results: $VALIDATION_DIR/"
log_info "🗺️ Migration plan: $VALIDATION_DIR/migration_execution_plan.json"
;;
"current-state")
validate_current_state
;;
"transformations")
create_migration_transformations
validate_migration_transformations
;;
"plan")
create_comprehensive_migration_plan
;;
"help"|*)
cat << EOF
Service Migration Validator
Usage: $0 <action>
Actions:
validate - Complete validation and planning (default)
current-state - Only validate current service state
transformations - Only create and validate transformations
plan - Only create migration execution plan
help - Show this help
Examples:
$0 validate
$0 current-state
$0 transformations
EOF
;;
esac
}
# Execute main function
main "$@"