#!/bin/bash # Setup Docker Swarm Cluster # This script initializes Docker Swarm across all hosts set -euo pipefail echo "đŸŗ Setting up Docker Swarm cluster..." # Define hosts and their roles MANAGER_HOST="omv800" MANAGER_IP="192.168.50.229" WORKER_HOSTS=("fedora" "surface" "jonathan-2518f5u" "audrey") WORKER_IPS=("192.168.50.225" "192.168.50.254" "192.168.50.181" "192.168.50.145") # Colors for output RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' NC='\033[0m' # No Color # Function to print colored output print_status() { echo -e "${GREEN}[INFO]${NC} $1" } print_warning() { echo -e "${YELLOW}[WARNING]${NC} $1" } print_error() { echo -e "${RED}[ERROR]${NC} $1" } # Function to check if Docker is installed check_docker() { local host=$1 print_status "Checking Docker installation on $host..." if ssh -o ConnectTimeout=10 "$host" "docker --version" > /dev/null 2>&1; then print_status "Docker is installed on $host" return 0 else print_error "Docker is not installed on $host" return 1 fi } # Function to check if host is already in swarm check_swarm_status() { local host=$1 if ssh -o ConnectTimeout=10 "$host" "docker info --format '{{.Swarm.LocalNodeState}}'" 2>/dev/null | grep -q "active"; then print_warning "$host is already part of a swarm" return 0 else print_status "$host is not in swarm mode" return 1 fi } # Function to leave swarm if already joined leave_swarm() { local host=$1 print_status "Leaving existing swarm on $host..." ssh -o ConnectTimeout=10 "$host" "docker swarm leave --force" 2>/dev/null || true sleep 5 } # 1. Check Docker installation on all hosts print_status "Step 1: Checking Docker installation..." for host in "$MANAGER_HOST" "${WORKER_HOSTS[@]}"; do if ! check_docker "$host"; then print_error "Please install Docker on $host before proceeding" exit 1 fi done # 2. Initialize swarm on manager print_status "Step 2: Initializing swarm on manager ($MANAGER_HOST)..." if check_swarm_status "$MANAGER_HOST"; then leave_swarm "$MANAGER_HOST" fi ssh "$MANAGER_HOST" "docker swarm init --advertise-addr $MANAGER_IP --listen-addr $MANAGER_IP" # Get join token for workers print_status "Getting join token for workers..." JOIN_TOKEN=$(ssh "$MANAGER_HOST" "docker swarm join-token -q worker") MANAGER_TOKEN=$(ssh "$MANAGER_HOST" "docker swarm join-token -q manager") print_status "Worker token: $JOIN_TOKEN" print_status "Manager token: $MANAGER_TOKEN" # 3. Join workers to swarm print_status "Step 3: Joining workers to swarm..." for i in "${!WORKER_HOSTS[@]}"; do host="${WORKER_HOSTS[$i]}" ip="${WORKER_IPS[$i]}" print_status "Joining $host ($ip) to swarm..." if check_swarm_status "$host"; then leave_swarm "$host" fi if ssh -o ConnectTimeout=10 "$host" "docker swarm join --token $JOIN_TOKEN $MANAGER_IP:2377 --advertise-addr $ip --listen-addr $ip"; then print_status "Successfully joined $host to swarm" else print_error "Failed to join $host to swarm" exit 1 fi done # 4. Verify swarm status print_status "Step 4: Verifying swarm status..." sleep 10 print_status "Swarm nodes:" ssh "$MANAGER_HOST" "docker node ls" # 5. Create overlay networks print_status "Step 5: Creating overlay networks..." NETWORKS=( "traefik-public" "monitoring" "databases" "applications" "iot-network" "backup-network" ) for network in "${NETWORKS[@]}"; do print_status "Creating network: $network" if ssh "$MANAGER_HOST" "docker network create --driver overlay --attachable $network" 2>/dev/null; then print_status "Created network: $network" else print_warning "Network $network may already exist" fi done # 6. Setup swarm manager backup print_status "Step 6: Setting up manager backup..." print_status "Promoting surface as backup manager..." if ssh "surface" "docker swarm join --token $MANAGER_TOKEN $MANAGER_IP:2377 --advertise-addr 192.168.50.254 --listen-addr 192.168.50.254"; then print_status "Successfully promoted surface as backup manager" else print_warning "Failed to promote surface as backup manager" fi # 7. Configure swarm settings print_status "Step 7: Configuring swarm settings..." # Set up auto-lock for security ssh "$MANAGER_HOST" "docker swarm update --autolock=true" # Configure logging ssh "$MANAGER_HOST" "docker swarm update --log-driver=json-file --log-opt max-size=10m --log-opt max-file=3" # 8. Create swarm configuration file print_status "Step 8: Creating swarm configuration..." cat > "/opt/migration/configs/swarm-config.yml" << EOF # Docker Swarm Configuration # Generated: $(date) swarm: manager: primary: $MANAGER_HOST backup: surface ip: $MANAGER_IP workers: $(for i in "${!WORKER_HOSTS[@]}"; do echo " - host: ${WORKER_HOSTS[$i]}"; echo " ip: ${WORKER_IPS[$i]}"; done) networks: $(for network in "${NETWORKS[@]}"; do echo " - $network"; done) tokens: worker: $JOIN_TOKEN manager: $MANAGER_TOKEN settings: autolock: true log_driver: json-file log_opts: max_size: 10m max_file: 3 EOF # 9. Test swarm connectivity print_status "Step 9: Testing swarm connectivity..." # Test service deployment print_status "Testing service deployment..." ssh "$MANAGER_HOST" "docker service create --name test-service --replicas 2 --network traefik-public nginx:alpine" sleep 10 # Check service status print_status "Service status:" ssh "$MANAGER_HOST" "docker service ls" ssh "$MANAGER_HOST" "docker service ps test-service" # Clean up test service print_status "Cleaning up test service..." ssh "$MANAGER_HOST" "docker service rm test-service" # 10. Create health check script print_status "Step 10: Creating health check script..." cat > "/opt/migration/scripts/check_swarm_health.sh" << 'EOF' #!/bin/bash # Check Docker Swarm Health set -euo pipefail MANAGER_HOST="omv800" echo "đŸĨ Checking Docker Swarm health..." # Check node status echo "📋 Node status:" ssh "$MANAGER_HOST" "docker node ls" # Check network status echo "🌐 Network status:" ssh "$MANAGER_HOST" "docker network ls --filter driver=overlay" # Check service status echo "🔧 Service status:" ssh "$MANAGER_HOST" "docker service ls" # Check swarm info echo "â„šī¸ Swarm info:" ssh "$MANAGER_HOST" "docker info --format '{{.Swarm.LocalNodeState}}'" echo "✅ Swarm health check completed" EOF chmod +x "/opt/migration/scripts/check_swarm_health.sh" # 11. Final verification print_status "Step 11: Final verification..." print_status "Swarm nodes:" ssh "$MANAGER_HOST" "docker node ls" print_status "Overlay networks:" ssh "$MANAGER_HOST" "docker network ls --filter driver=overlay" print_status "Swarm info:" ssh "$MANAGER_HOST" "docker info --format '{{.Swarm.LocalNodeState}}'" # 12. Create summary print_status "Step 12: Creating setup summary..." cat > "/opt/migration/setup_summary.txt" << EOF Docker Swarm Setup Summary Generated: $(date) Manager Node: Host: $MANAGER_HOST IP: $MANAGER_IP Status: Active Backup Manager: Host: surface IP: 192.168.50.254 Status: Active Worker Nodes: $(for i in "${!WORKER_HOSTS[@]}"; do echo " - ${WORKER_HOSTS[$i]}: ${WORKER_IPS[$i]}"; done) Networks Created: $(for network in "${NETWORKS[@]}"; do echo " - $network"; done) Configuration Files: - /opt/migration/configs/swarm-config.yml - /opt/migration/scripts/check_swarm_health.sh Next Steps: 1. Deploy Traefik reverse proxy 2. Setup monitoring stack 3. Begin service migration EOF print_status "✅ Docker Swarm setup completed successfully!" print_status "📋 Setup summary saved to: /opt/migration/setup_summary.txt" print_status "🔧 Health check script: /opt/migration/scripts/check_swarm_health.sh" echo "" print_status "Next steps:" echo " 1. Deploy Traefik: ./scripts/deploy_traefik.sh" echo " 2. Setup monitoring: ./scripts/setup_monitoring.sh" echo " 3. Begin migration: ./scripts/start_migration.sh"