#!/bin/bash # Docker Swarm Optimizer # Configures Docker Swarm with proper resource constraints, high availability, and anti-affinity rules # Import error handling library SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" source "$SCRIPT_DIR/lib/error_handling.sh" # Configuration readonly HOSTS=("omv800" "fedora" "surface" "jonathan-2518f5u" "audrey" "raspberrypi") readonly HOST_IPS=("192.168.50.229" "192.168.50.225" "192.168.50.254" "192.168.50.181" "192.168.50.145" "192.168.50.107") readonly MANAGER_HOST="omv800" readonly BACKUP_MANAGER="surface" readonly SWARM_CONFIG_DIR="/opt/migration/configs/swarm" readonly DOCKER_COMPOSE_DIR="/opt/migration/configs/services" # Host capabilities and roles declare -A HOST_ROLES=( ["omv800"]="primary-manager,storage,database" ["surface"]="backup-manager,compute,development" ["fedora"]="compute,automation" ["jonathan-2518f5u"]="iot,edge" ["audrey"]="monitoring,logging" ["raspberrypi"]="backup,storage" ) # Resource specifications per host (in GB for memory, cores for CPU) declare -A HOST_RESOURCES=( ["omv800"]="memory:31,cpu:4,storage:high" ["surface"]="memory:8,cpu:4,storage:medium" ["fedora"]="memory:15,cpu:4,storage:medium" ["jonathan-2518f5u"]="memory:8,cpu:4,storage:low" ["audrey"]="memory:4,cpu:2,storage:low" ["raspberrypi"]="memory:8,cpu:4,storage:high" ) # Service resource requirements and constraints declare -A SERVICE_CONFIGS=( ["traefik"]="memory:512m,cpu:0.5,replicas:2,placement:manager" ["immich-web"]="memory:2g,cpu:1.0,replicas:2,placement:storage" ["immich-ml"]="memory:4g,cpu:2.0,replicas:1,placement:compute" ["jellyfin"]="memory:4g,cpu:2.0,replicas:1,placement:storage" ["homeassistant"]="memory:1g,cpu:0.5,replicas:2,placement:iot" ["appflowy"]="memory:1g,cpu:0.5,replicas:2,placement:development" ["paperless"]="memory:2g,cpu:1.0,replicas:2,placement:any" ["postgres"]="memory:4g,cpu:2.0,replicas:1,placement:database" ["redis"]="memory:512m,cpu:0.25,replicas:3,placement:database" ["prometheus"]="memory:2g,cpu:1.0,replicas:1,placement:monitoring" ["grafana"]="memory:1g,cpu:0.5,replicas:2,placement:monitoring" ["portainer"]="memory:512m,cpu:0.25,replicas:1,placement:manager" ) # Cleanup function cleanup_swarm_config() { log_info "Cleaning up Docker Swarm configuration..." # Clean up temporary files rm -f /tmp/swarm_*.tmp 2>/dev/null || true rm -f /tmp/docker_*.tmp 2>/dev/null || true log_info "Swarm configuration cleanup completed" } # Rollback function rollback_swarm_config() { log_info "Rolling back Docker Swarm configuration..." # Stop any services that were deployed during configuration local services=$(ssh "$MANAGER_HOST" "docker service ls -q" 2>/dev/null || echo "") if [[ -n "$services" ]]; then log_info "Stopping services for rollback..." ssh "$MANAGER_HOST" "docker service ls -q | xargs -r docker service rm" 2>/dev/null || true fi cleanup_swarm_config log_info "Swarm rollback completed" } # Function to validate Docker versions across hosts validate_docker_versions() { log_step "Validating Docker versions across hosts..." local version_issues=0 local reference_version="" for i in "${!HOSTS[@]}"; do local host="${HOSTS[$i]}" log_info "Checking Docker version on $host..." local docker_version=$(ssh -o ConnectTimeout=10 "$host" "docker version --format '{{.Server.Version}}'" 2>/dev/null || echo "ERROR") if [[ "$docker_version" == "ERROR" ]]; then log_error "Cannot get Docker version from $host" ((version_issues++)) continue fi log_info "Docker version on $host: $docker_version" # Set reference version from first host if [[ -z "$reference_version" ]]; then reference_version="$docker_version" else # Check version compatibility (allow minor version differences) local ref_major=$(echo "$reference_version" | cut -d. -f1) local current_major=$(echo "$docker_version" | cut -d. -f1) if [[ "$ref_major" != "$current_major" ]]; then log_warn "Docker major version mismatch: $host has $docker_version, reference is $reference_version" ((version_issues++)) fi fi done if [[ $version_issues -eq 0 ]]; then log_success "All Docker versions are compatible" return 0 else log_error "$version_issues hosts have Docker version issues" return 1 fi } # Function to configure node labels for proper service placement configure_node_labels() { log_step "Configuring Docker Swarm node labels..." for i in "${!HOSTS[@]}"; do local host="${HOSTS[$i]}" local roles="${HOST_ROLES[$host]}" local resources="${HOST_RESOURCES[$host]}" log_info "Configuring labels for $host: $roles" # Parse roles and apply labels IFS=',' read -ra ROLE_ARRAY <<< "$roles" for role in "${ROLE_ARRAY[@]}"; do if ssh "$MANAGER_HOST" "docker node update --label-add role.$role=true $host"; then log_debug "Applied label role.$role=true to $host" else log_error "Failed to apply label role.$role=true to $host" return 1 fi done # Parse and apply resource labels IFS=',' read -ra RESOURCE_ARRAY <<< "$resources" for resource in "${RESOURCE_ARRAY[@]}"; do local key=$(echo "$resource" | cut -d: -f1) local value=$(echo "$resource" | cut -d: -f2) if ssh "$MANAGER_HOST" "docker node update --label-add $key=$value $host"; then log_debug "Applied resource label $key=$value to $host" else log_warn "Failed to apply resource label $key=$value to $host" fi done # Apply availability zone labels for anti-affinity local zone="zone$(((i % 3) + 1))" # Distribute across 3 zones if ssh "$MANAGER_HOST" "docker node update --label-add zone=$zone $host"; then log_debug "Applied zone label $zone to $host" else log_warn "Failed to apply zone label to $host" fi done log_success "Node labels configured successfully" } # Function to configure Docker daemon settings configure_docker_daemon() { log_step "Configuring Docker daemon settings..." # Create optimized Docker daemon configuration local daemon_config=$(cat << 'EOF' { "log-driver": "json-file", "log-opts": { "max-size": "10m", "max-file": "3" }, "storage-driver": "overlay2", "live-restore": true, "userland-proxy": false, "experimental": false, "metrics-addr": "127.0.0.1:9323", "default-ulimits": { "nofile": { "Name": "nofile", "Hard": 64000, "Soft": 64000 } }, "max-concurrent-downloads": 3, "max-concurrent-uploads": 5, "default-shm-size": "64M", "storage-opts": [ "overlay2.override_kernel_check=true" ] } EOF ) # Apply configuration to all hosts for host in "${HOSTS[@]}"; do log_info "Configuring Docker daemon on $host..." # Backup existing configuration ssh "$host" "sudo cp /etc/docker/daemon.json /etc/docker/daemon.json.backup 2>/dev/null || true" # Apply new configuration echo "$daemon_config" | ssh "$host" "sudo tee /etc/docker/daemon.json > /dev/null" # Restart Docker daemon if ssh "$host" "sudo systemctl restart docker"; then log_success "Docker daemon configured on $host" else log_error "Failed to restart Docker daemon on $host" return 1 fi # Wait for Docker to be ready wait_for_service "Docker-$host" "ssh $host docker info >/dev/null 2>&1" 30 5 done log_success "Docker daemon configuration completed" } # Function to configure swarm settings for high availability configure_swarm_settings() { log_step "Configuring Docker Swarm for high availability..." # Configure swarm with optimized settings local swarm_config_updates=( "--autolock=true" "--cert-expiry=2160h0m0s" # 90 days "--dispatcher-heartbeat=5s" "--task-history-limit=5" ) for config in "${swarm_config_updates[@]}"; do if ssh "$MANAGER_HOST" "docker swarm update $config"; then log_success "Applied swarm config: $config" else log_warn "Failed to apply swarm config: $config" fi done # Ensure backup manager is promoted if ssh "$MANAGER_HOST" "docker node ls" | grep -q "$BACKUP_MANAGER.*Leader\|$BACKUP_MANAGER.*Reachable"; then log_success "Backup manager $BACKUP_MANAGER is already promoted" else log_info "Promoting $BACKUP_MANAGER to manager role..." local manager_token=$(ssh "$MANAGER_HOST" "docker swarm join-token -q manager") if ssh "$BACKUP_MANAGER" "docker swarm leave" 2>/dev/null || true; then if ssh "$BACKUP_MANAGER" "docker swarm join --token $manager_token 192.168.50.229:2377"; then log_success "Successfully promoted $BACKUP_MANAGER to manager" else log_error "Failed to promote $BACKUP_MANAGER to manager" return 1 fi fi fi log_success "Swarm high availability configuration completed" } # Function to create optimized service configurations create_optimized_service_configs() { log_step "Creating optimized service configurations..." mkdir -p "$DOCKER_COMPOSE_DIR" # Create Traefik configuration with proper resource constraints cat > "$DOCKER_COMPOSE_DIR/traefik-optimized.yml" << 'EOF' version: '3.8' services: traefik: image: traefik:v3.0 command: # API and dashboard - --api.dashboard=true - --api.insecure=false # Docker provider - --providers.docker.swarmMode=true - --providers.docker.exposedbydefault=false - --providers.docker.network=public-zone # Entry points - --entrypoints.web.address=:80 - --entrypoints.websecure.address=:443 - --entrypoints.web.http.redirections.entrypoint.to=websecure - --entrypoints.web.http.redirections.entrypoint.scheme=https # SSL/TLS configuration - --certificatesresolvers.letsencrypt.acme.email=${EMAIL} - --certificatesresolvers.letsencrypt.acme.storage=/certificates/acme.json - --certificatesresolvers.letsencrypt.acme.httpchallenge.entrypoint=web # Logging and monitoring - --log.level=INFO - --log.format=json - --accesslog=true - --accesslog.format=json - --metrics.prometheus=true - --ping=true ports: - target: 80 published: 80 protocol: tcp mode: ingress - target: 443 published: 443 protocol: tcp mode: ingress volumes: - /var/run/docker.sock:/var/run/docker.sock:ro - traefik-certificates:/certificates - traefik-logs:/var/log/traefik secrets: - traefik_users networks: - public-zone - management-zone environment: - DOMAIN=${DOMAIN} - EMAIL=${EMAIL} deploy: mode: replicated replicas: 2 placement: constraints: - node.role == manager preferences: - spread: node.labels.zone resources: limits: memory: 512M cpus: '0.5' reservations: memory: 256M cpus: '0.25' restart_policy: condition: on-failure delay: 5s max_attempts: 3 window: 120s update_config: parallelism: 1 delay: 10s order: start-first failure_action: rollback monitor: 60s rollback_config: parallelism: 1 delay: 5s order: stop-first monitor: 60s labels: - "traefik.enable=true" - "traefik.http.routers.traefik-dashboard.rule=Host(`traefik.${DOMAIN}`)" - "traefik.http.routers.traefik-dashboard.entrypoints=websecure" - "traefik.http.routers.traefik-dashboard.tls.certresolver=letsencrypt" - "traefik.http.routers.traefik-dashboard.service=api@internal" - "traefik.http.routers.traefik-dashboard.middlewares=auth-secure@file" secrets: traefik_users: external: true volumes: traefik-certificates: driver: local driver_opts: type: none o: bind device: /opt/traefik/certificates traefik-logs: driver: local driver_opts: type: none o: bind device: /opt/traefik/logs networks: public-zone: external: true management-zone: external: true EOF # Create PostgreSQL cluster configuration cat > "$DOCKER_COMPOSE_DIR/postgres-cluster.yml" << 'EOF' version: '3.8' services: postgres-primary: image: postgres:15-alpine environment: POSTGRES_DB: ${POSTGRES_DB} POSTGRES_USER: ${POSTGRES_USER} POSTGRES_PASSWORD_FILE: /run/secrets/postgres_password POSTGRES_REPLICATION_USER: replicator POSTGRES_REPLICATION_PASSWORD_FILE: /run/secrets/postgres_replication_password secrets: - postgres_password - postgres_replication_password volumes: - postgres-primary-data:/var/lib/postgresql/data - postgres-config:/etc/postgresql networks: - data-zone deploy: mode: replicated replicas: 1 placement: constraints: - node.labels.role.database == true - node.labels.storage == high resources: limits: memory: 4G cpus: '2.0' reservations: memory: 2G cpus: '1.0' restart_policy: condition: on-failure delay: 10s max_attempts: 3 update_config: parallelism: 1 delay: 30s order: stop-first failure_action: rollback monitor: 120s healthcheck: test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER}"] interval: 30s timeout: 10s retries: 3 start_period: 40s postgres-replica: image: postgres:15-alpine environment: POSTGRES_DB: ${POSTGRES_DB} POSTGRES_USER: ${POSTGRES_USER} POSTGRES_PASSWORD_FILE: /run/secrets/postgres_password PGUSER: ${POSTGRES_USER} POSTGRES_PRIMARY_HOST: postgres-primary secrets: - postgres_password volumes: - postgres-replica-data:/var/lib/postgresql/data networks: - data-zone depends_on: - postgres-primary deploy: mode: replicated replicas: 1 placement: constraints: - node.labels.role.database == true - node.labels.storage != low preferences: - spread: node.labels.zone resources: limits: memory: 2G cpus: '1.0' reservations: memory: 1G cpus: '0.5' restart_policy: condition: on-failure delay: 10s max_attempts: 3 healthcheck: test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER}"] interval: 30s timeout: 10s retries: 3 secrets: postgres_password: external: true postgres_replication_password: external: true volumes: postgres-primary-data: driver: local driver_opts: type: none o: bind device: /opt/postgresql/primary/data postgres-replica-data: driver: local driver_opts: type: none o: bind device: /opt/postgresql/replica/data postgres-config: driver: local networks: data-zone: external: true EOF # Create Redis cluster configuration cat > "$DOCKER_COMPOSE_DIR/redis-cluster.yml" << 'EOF' version: '3.8' services: redis-primary: image: redis:7-alpine command: redis-server --appendonly yes --requirepass-file /run/secrets/redis_password secrets: - redis_password volumes: - redis-primary-data:/data networks: - data-zone deploy: mode: replicated replicas: 1 placement: constraints: - node.labels.role.database == true preferences: - spread: node.labels.zone resources: limits: memory: 512M cpus: '0.5' reservations: memory: 256M cpus: '0.25' restart_policy: condition: on-failure delay: 5s max_attempts: 3 healthcheck: test: ["CMD", "redis-cli", "--raw", "incr", "ping"] interval: 30s timeout: 10s retries: 3 redis-replica: image: redis:7-alpine command: redis-server --appendonly yes --requirepass-file /run/secrets/redis_password --replicaof redis-primary 6379 secrets: - redis_password volumes: - redis-replica-data:/data networks: - data-zone depends_on: - redis-primary deploy: mode: replicated replicas: 2 placement: constraints: - node.labels.role.database == true preferences: - spread: node.labels.zone resources: limits: memory: 256M cpus: '0.25' reservations: memory: 128M cpus: '0.1' restart_policy: condition: on-failure delay: 5s max_attempts: 3 secrets: redis_password: external: true volumes: redis-primary-data: driver: local redis-replica-data: driver: local networks: data-zone: external: true EOF log_success "Optimized service configurations created" } # Function to deploy resource monitoring deploy_resource_monitoring() { log_step "Deploying resource monitoring..." # Create resource monitoring configuration cat > "$DOCKER_COMPOSE_DIR/resource-monitoring.yml" << 'EOF' version: '3.8' services: cadvisor: image: gcr.io/cadvisor/cadvisor:latest volumes: - /:/rootfs:ro - /var/run:/var/run:ro - /sys:/sys:ro - /var/lib/docker/:/var/lib/docker:ro - /dev/disk/:/dev/disk:ro ports: - target: 8080 published: 8080 protocol: tcp mode: host networks: - monitoring-zone deploy: mode: global resources: limits: memory: 256M cpus: '0.2' reservations: memory: 128M cpus: '0.1' restart_policy: condition: on-failure delay: 5s max_attempts: 3 command: - '--housekeeping_interval=10s' - '--docker_only=true' - '--disable_metrics=disk,network,tcp,udp,percpu,sched,process' node-exporter: image: prom/node-exporter:latest volumes: - /proc:/host/proc:ro - /sys:/host/sys:ro - /:/rootfs:ro ports: - target: 9100 published: 9100 protocol: tcp mode: host networks: - monitoring-zone deploy: mode: global resources: limits: memory: 128M cpus: '0.1' reservations: memory: 64M cpus: '0.05' restart_policy: condition: on-failure delay: 5s max_attempts: 3 command: - '--path.procfs=/host/proc' - '--path.sysfs=/host/sys' - '--collector.filesystem.ignored-mount-points' - '^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)' networks: monitoring-zone: external: true EOF # Deploy resource monitoring if ssh "$MANAGER_HOST" "cd $DOCKER_COMPOSE_DIR && docker stack deploy -c resource-monitoring.yml monitoring"; then log_success "Resource monitoring deployed successfully" else log_error "Failed to deploy resource monitoring" return 1 fi # Wait for services to be ready wait_for_service "Resource monitoring" "ssh $MANAGER_HOST 'docker service ls | grep monitoring_cadvisor | grep -q \"1/\"'" 60 10 log_success "Resource monitoring deployment completed" } # Function to test swarm functionality test_swarm_functionality() { log_step "Testing Docker Swarm functionality..." # Test service deployment log_info "Testing service deployment..." local test_service="test-swarm-function" if ssh "$MANAGER_HOST" "docker service create --name $test_service --replicas 3 --constraint 'node.role!=manager' alpine sleep 300"; then log_success "Test service deployed successfully" else log_error "Failed to deploy test service" return 1 fi # Wait for service to be ready sleep 15 # Check service status local running_replicas=$(ssh "$MANAGER_HOST" "docker service ps $test_service | grep -c Running") if [[ $running_replicas -ge 2 ]]; then log_success "Test service has $running_replicas running replicas" else log_error "Test service only has $running_replicas running replicas" fi # Test service scaling log_info "Testing service scaling..." if ssh "$MANAGER_HOST" "docker service scale ${test_service}=5"; then sleep 10 local scaled_replicas=$(ssh "$MANAGER_HOST" "docker service ps $test_service | grep -c Running") log_success "Service scaled to $scaled_replicas replicas" else log_warn "Service scaling test failed" fi # Test rolling update log_info "Testing rolling update..." if ssh "$MANAGER_HOST" "docker service update --image alpine:latest $test_service"; then log_success "Rolling update test completed" else log_warn "Rolling update test failed" fi # Cleanup test service ssh "$MANAGER_HOST" "docker service rm $test_service" >/dev/null 2>&1 || true # Test network connectivity between nodes log_info "Testing network connectivity..." local connectivity_issues=0 for host in "${HOSTS[@]}"; do if [[ "$host" != "$MANAGER_HOST" ]] && [[ "$host" != "raspberrypi" ]]; then if ping -c 1 -W 5 "$host" >/dev/null 2>&1; then log_debug "Network connectivity to $host: OK" else log_error "Network connectivity to $host: FAILED" ((connectivity_issues++)) fi fi done if [[ $connectivity_issues -eq 0 ]]; then log_success "All network connectivity tests passed" else log_error "$connectivity_issues network connectivity issues detected" return 1 fi log_success "Docker Swarm functionality tests completed successfully" } # Function to create swarm health monitoring script create_swarm_health_monitor() { log_step "Creating swarm health monitoring script..." cat > "/opt/migration/scripts/swarm_health_monitor.sh" << 'EOF' #!/bin/bash # Docker Swarm Health Monitor # Monitors swarm health and sends alerts for issues MANAGER_HOST="omv800" ALERT_LOG="/var/log/swarm_health.log" ALERT_THRESHOLD_CPU=80 ALERT_THRESHOLD_MEMORY=85 log_alert() { echo "$(date): SWARM_ALERT - $1" | tee -a "$ALERT_LOG" logger "SWARM_HEALTH_ALERT: $1" } check_node_health() { local nodes_down=$(ssh "$MANAGER_HOST" "docker node ls --format '{{.Status}}'" | grep -c Down || echo "0") if [[ $nodes_down -gt 0 ]]; then log_alert "Docker nodes down: $nodes_down" fi local nodes_unavailable=$(ssh "$MANAGER_HOST" "docker node ls --format '{{.Availability}}'" | grep -c Drain || echo "0") if [[ $nodes_unavailable -gt 1 ]]; then # Allow one for maintenance log_alert "Multiple nodes unavailable: $nodes_unavailable" fi } check_service_health() { local services_with_issues=$(ssh "$MANAGER_HOST" "docker service ls --format '{{.Name}} {{.Replicas}}'" | grep -c "0/\|1/[2-9]" || echo "0") if [[ $services_with_issues -gt 0 ]]; then log_alert "Services with replica issues: $services_with_issues" fi } check_resource_usage() { # Check if resource monitoring is available for host in omv800 fedora surface jonathan-2518f5u audrey; do local cpu_usage=$(curl -s "http://${host}:8080/api/v1.3/machine" 2>/dev/null | jq -r '.cpu_usage_rate // 0' 2>/dev/null || echo "0") local memory_usage=$(curl -s "http://${host}:8080/api/v1.3/machine" 2>/dev/null | jq -r '.memory.usage // 0' 2>/dev/null || echo "0") # Convert to percentage if needed if (( $(echo "$cpu_usage > $ALERT_THRESHOLD_CPU" | bc -l 2>/dev/null || echo "0") )); then log_alert "High CPU usage on $host: ${cpu_usage}%" fi # Memory usage calculation would need more complex logic # This is simplified for demonstration done } check_swarm_secrets() { local secrets_count=$(ssh "$MANAGER_HOST" "docker secret ls -q | wc -l") if [[ $secrets_count -lt 5 ]]; then # Expecting at least 5 secrets log_alert "Unexpected low secret count: $secrets_count" fi } # Main monitoring loop while true; do check_node_health check_service_health check_resource_usage check_swarm_secrets sleep 300 # Check every 5 minutes done EOF chmod +x "/opt/migration/scripts/swarm_health_monitor.sh" # Deploy health monitor as a systemd service on manager ssh "$MANAGER_HOST" "cat > /tmp/swarm-health-monitor.service << 'SERVICE_EOF' [Unit] Description=Docker Swarm Health Monitor After=docker.service Requires=docker.service [Service] ExecStart=/opt/migration/scripts/swarm_health_monitor.sh Restart=always RestartSec=10 User=root [Install] WantedBy=multi-user.target SERVICE_EOF" scp "/opt/migration/scripts/swarm_health_monitor.sh" "$MANAGER_HOST:/opt/migration/scripts/" ssh "$MANAGER_HOST" "sudo mv /tmp/swarm-health-monitor.service /etc/systemd/system/" ssh "$MANAGER_HOST" "sudo systemctl daemon-reload && sudo systemctl enable swarm-health-monitor.service" if ssh "$MANAGER_HOST" "sudo systemctl start swarm-health-monitor.service"; then log_success "Swarm health monitor started on $MANAGER_HOST" else log_warn "Swarm health monitor may have issues" fi log_success "Swarm health monitoring setup completed" } # Main execution function main() { local action=${1:-"full"} # Register cleanup and rollback functions register_cleanup cleanup_swarm_config register_rollback rollback_swarm_config case $action in "full") log_step "Starting Docker Swarm optimization..." # Validate prerequisites validate_prerequisites ssh docker jq bc curl # Validate network connectivity validate_network_connectivity "${HOST_IPS[@]}" # Create checkpoint create_checkpoint "swarm_optimization_start" # Validate Docker versions validate_docker_versions create_checkpoint "docker_versions_validated" # Configure Docker daemon configure_docker_daemon create_checkpoint "docker_daemon_configured" # Configure node labels configure_node_labels create_checkpoint "node_labels_configured" # Configure swarm settings configure_swarm_settings create_checkpoint "swarm_settings_configured" # Create optimized service configurations create_optimized_service_configs create_checkpoint "service_configs_created" # Deploy resource monitoring deploy_resource_monitoring create_checkpoint "resource_monitoring_deployed" # Test swarm functionality test_swarm_functionality create_checkpoint "swarm_functionality_tested" # Create health monitoring create_swarm_health_monitor create_checkpoint "health_monitoring_setup" log_success "✅ Docker Swarm optimization completed successfully!" log_info "📊 Check swarm status: ssh $MANAGER_HOST docker node ls" log_info "🔍 Monitor resources: http://any-host:8080 (cAdvisor)" ;; "labels-only") configure_node_labels ;; "test-only") test_swarm_functionality ;; "monitor-only") deploy_resource_monitoring create_swarm_health_monitor ;; "help"|*) cat << EOF Docker Swarm Optimizer Usage: $0 Actions: full - Complete swarm optimization (default) labels-only - Only configure node labels test-only - Only test swarm functionality monitor-only - Only deploy monitoring help - Show this help Examples: $0 full $0 test-only $0 monitor-only EOF ;; esac } # Execute main function main "$@"