#!/bin/bash # Dynamic Resource Scaling Automation # Automatically scales services based on resource utilization metrics set -euo pipefail # Configuration SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(dirname "$SCRIPT_DIR")" LOG_FILE="$PROJECT_ROOT/logs/resource-scaling-$(date +%Y%m%d-%H%M%S).log" # Scaling thresholds CPU_HIGH_THRESHOLD=80 CPU_LOW_THRESHOLD=20 MEMORY_HIGH_THRESHOLD=85 MEMORY_LOW_THRESHOLD=30 # Scaling limits MAX_REPLICAS=5 MIN_REPLICAS=1 # Services to manage (add more as needed) SCALABLE_SERVICES=( "nextcloud_nextcloud" "immich_immich_server" "paperless_paperless" "jellyfin_jellyfin" "grafana_grafana" ) # Create directories mkdir -p "$(dirname "$LOG_FILE")" "$PROJECT_ROOT/logs" # Logging function log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" | tee -a "$LOG_FILE" } # Get service metrics get_service_metrics() { local service_name="$1" local metrics=() # Get running containers for this service local containers containers=$(docker service ps "$service_name" --filter "desired-state=running" --format "{{.ID}}" 2>/dev/null || echo "") if [[ -z "$containers" ]]; then echo "0 0 0" # cpu_percent memory_percent replica_count return fi # Calculate average metrics across all replicas local total_cpu=0 local total_memory=0 local container_count=0 while IFS= read -r container_id; do if [[ -n "$container_id" ]]; then # Get container stats local stats stats=$(docker stats --no-stream --format "{{.CPUPerc}},{{.MemPerc}}" "$(docker ps -q -f name=$container_id)" 2>/dev/null || echo "0.00%,0.00%") local cpu_percent local mem_percent cpu_percent=$(echo "$stats" | cut -d',' -f1 | sed 's/%//') mem_percent=$(echo "$stats" | cut -d',' -f2 | sed 's/%//') if [[ "$cpu_percent" =~ ^[0-9]+\.?[0-9]*$ ]] && [[ "$mem_percent" =~ ^[0-9]+\.?[0-9]*$ ]]; then total_cpu=$(echo "$total_cpu + $cpu_percent" | bc -l) total_memory=$(echo "$total_memory + $mem_percent" | bc -l) ((container_count++)) fi fi done <<< "$containers" if [[ $container_count -gt 0 ]]; then local avg_cpu local avg_memory avg_cpu=$(echo "scale=2; $total_cpu / $container_count" | bc -l) avg_memory=$(echo "scale=2; $total_memory / $container_count" | bc -l) echo "$avg_cpu $avg_memory $container_count" else echo "0 0 0" fi } # Get current replica count get_replica_count() { local service_name="$1" docker service ls --filter "name=$service_name" --format "{{.Replicas}}" | cut -d'/' -f1 } # Scale service up scale_up() { local service_name="$1" local current_replicas="$2" local new_replicas=$((current_replicas + 1)) if [[ $new_replicas -le $MAX_REPLICAS ]]; then log "🔼 Scaling UP $service_name: $current_replicas → $new_replicas replicas" docker service update --replicas "$new_replicas" "$service_name" >/dev/null 2>&1 || { log "❌ Failed to scale up $service_name" return 1 } log "✅ Successfully scaled up $service_name" # Record scaling event echo "$(date -Iseconds),scale_up,$service_name,$current_replicas,$new_replicas,auto" >> "$PROJECT_ROOT/logs/scaling-events.csv" else log "⚠️ $service_name already at maximum replicas ($MAX_REPLICAS)" fi } # Scale service down scale_down() { local service_name="$1" local current_replicas="$2" local new_replicas=$((current_replicas - 1)) if [[ $new_replicas -ge $MIN_REPLICAS ]]; then log "🔽 Scaling DOWN $service_name: $current_replicas → $new_replicas replicas" docker service update --replicas "$new_replicas" "$service_name" >/dev/null 2>&1 || { log "❌ Failed to scale down $service_name" return 1 } log "✅ Successfully scaled down $service_name" # Record scaling event echo "$(date -Iseconds),scale_down,$service_name,$current_replicas,$new_replicas,auto" >> "$PROJECT_ROOT/logs/scaling-events.csv" else log "⚠️ $service_name already at minimum replicas ($MIN_REPLICAS)" fi } # Check if scaling is needed evaluate_scaling() { local service_name="$1" local cpu_percent="$2" local memory_percent="$3" local current_replicas="$4" # Convert to integer for comparison local cpu_int local memory_int cpu_int=$(echo "$cpu_percent" | cut -d'.' -f1) memory_int=$(echo "$memory_percent" | cut -d'.' -f1) # Scale up conditions if [[ $cpu_int -gt $CPU_HIGH_THRESHOLD ]] || [[ $memory_int -gt $MEMORY_HIGH_THRESHOLD ]]; then log "📊 $service_name metrics: CPU=${cpu_percent}%, Memory=${memory_percent}% - HIGH usage detected" scale_up "$service_name" "$current_replicas" return fi # Scale down conditions (only if we have more than minimum replicas) if [[ $current_replicas -gt $MIN_REPLICAS ]] && [[ $cpu_int -lt $CPU_LOW_THRESHOLD ]] && [[ $memory_int -lt $MEMORY_LOW_THRESHOLD ]]; then log "📊 $service_name metrics: CPU=${cpu_percent}%, Memory=${memory_percent}% - LOW usage detected" scale_down "$service_name" "$current_replicas" return fi # No scaling needed log "📊 $service_name metrics: CPU=${cpu_percent}%, Memory=${memory_percent}%, Replicas=$current_replicas - OK" } # Time-based scaling (scale down non-critical services at night) time_based_scaling() { local current_hour current_hour=$(date +%H) # Night hours (2 AM - 6 AM): scale down non-critical services if [[ $current_hour -ge 2 && $current_hour -le 6 ]]; then local night_services=("paperless_paperless" "grafana_grafana") for service in "${night_services[@]}"; do local current_replicas current_replicas=$(get_replica_count "$service") if [[ $current_replicas -gt 1 ]]; then log "🌙 Night scaling: reducing $service to 1 replica (was $current_replicas)" docker service update --replicas 1 "$service" >/dev/null 2>&1 || true echo "$(date -Iseconds),night_scale_down,$service,$current_replicas,1,time_based" >> "$PROJECT_ROOT/logs/scaling-events.csv" fi done fi # Morning hours (7 AM): scale back up if [[ $current_hour -eq 7 ]]; then local morning_services=("paperless_paperless" "grafana_grafana") for service in "${morning_services[@]}"; do local current_replicas current_replicas=$(get_replica_count "$service") if [[ $current_replicas -lt 2 ]]; then log "🌅 Morning scaling: restoring $service to 2 replicas (was $current_replicas)" docker service update --replicas 2 "$service" >/dev/null 2>&1 || true echo "$(date -Iseconds),morning_scale_up,$service,$current_replicas,2,time_based" >> "$PROJECT_ROOT/logs/scaling-events.csv" fi done fi } # Generate scaling report generate_scaling_report() { log "Generating scaling report..." local report_file="$PROJECT_ROOT/logs/scaling-report-$(date +%Y%m%d).yaml" cat > "$report_file" << EOF scaling_report: timestamp: "$(date -Iseconds)" evaluation_cycle: $(date +%Y%m%d-%H%M%S) current_state: EOF # Add current state of all services for service in "${SCALABLE_SERVICES[@]}"; do local metrics metrics=$(get_service_metrics "$service") local cpu_percent memory_percent replica_count read -r cpu_percent memory_percent replica_count <<< "$metrics" cat >> "$report_file" << EOF - service: "$service" replicas: $replica_count cpu_usage: "${cpu_percent}%" memory_usage: "${memory_percent}%" status: $(if docker service ls --filter "name=$service" --format "{{.Name}}" >/dev/null 2>&1; then echo "running"; else echo "not_found"; fi) EOF done # Add scaling events from today local events_today events_today=$(grep "$(date +%Y-%m-%d)" "$PROJECT_ROOT/logs/scaling-events.csv" 2>/dev/null | wc -l || echo "0") cat >> "$report_file" << EOF daily_summary: scaling_events_today: $events_today thresholds: cpu_high: ${CPU_HIGH_THRESHOLD}% cpu_low: ${CPU_LOW_THRESHOLD}% memory_high: ${MEMORY_HIGH_THRESHOLD}% memory_low: ${MEMORY_LOW_THRESHOLD}% limits: max_replicas: $MAX_REPLICAS min_replicas: $MIN_REPLICAS EOF log "✅ Scaling report generated: $report_file" } # Setup continuous monitoring setup_monitoring() { log "Setting up dynamic scaling monitoring..." # Create systemd service for continuous monitoring cat > /tmp/docker-autoscaler.service << 'EOF' [Unit] Description=Docker Swarm Auto Scaler After=docker.service Requires=docker.service [Service] Type=simple ExecStart=/home/jonathan/Coding/HomeAudit/scripts/dynamic-resource-scaling.sh --monitor Restart=always RestartSec=60 User=root [Install] WantedBy=multi-user.target EOF # Create monitoring loop script cat > "$PROJECT_ROOT/scripts/scaling-monitor-loop.sh" << 'EOF' #!/bin/bash # Continuous monitoring loop for dynamic scaling SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" cd "$SCRIPT_DIR" while true; do # Run scaling evaluation ./dynamic-resource-scaling.sh --evaluate # Wait 5 minutes between evaluations sleep 300 done EOF chmod +x "$PROJECT_ROOT/scripts/scaling-monitor-loop.sh" log "✅ Monitoring scripts created" log "⚠️ To enable: sudo cp /tmp/docker-autoscaler.service /etc/systemd/system/ && sudo systemctl enable --now docker-autoscaler" } # Main execution main() { case "${1:-evaluate}" in "--evaluate") log "🔍 Starting dynamic scaling evaluation..." # Initialize CSV file if it doesn't exist if [[ ! -f "$PROJECT_ROOT/logs/scaling-events.csv" ]]; then echo "timestamp,action,service,old_replicas,new_replicas,trigger" > "$PROJECT_ROOT/logs/scaling-events.csv" fi # Check each scalable service for service in "${SCALABLE_SERVICES[@]}"; do if docker service ls --filter "name=$service" --format "{{.Name}}" >/dev/null 2>&1; then local metrics metrics=$(get_service_metrics "$service") local cpu_percent memory_percent current_replicas read -r cpu_percent memory_percent current_replicas <<< "$metrics" evaluate_scaling "$service" "$cpu_percent" "$memory_percent" "$current_replicas" else log "⚠️ Service not found: $service" fi done # Apply time-based scaling time_based_scaling # Generate report generate_scaling_report ;; "--monitor") log "🔄 Starting continuous monitoring mode..." while true; do ./dynamic-resource-scaling.sh --evaluate sleep 300 # 5-minute intervals done ;; "--setup") setup_monitoring ;; "--status") log "📊 Current service status:" for service in "${SCALABLE_SERVICES[@]}"; do if docker service ls --filter "name=$service" --format "{{.Name}}" >/dev/null 2>&1; then local metrics metrics=$(get_service_metrics "$service") local cpu_percent memory_percent current_replicas read -r cpu_percent memory_percent current_replicas <<< "$metrics" log " $service: ${current_replicas} replicas, CPU=${cpu_percent}%, Memory=${memory_percent}%" else log " $service: not found" fi done ;; "--help"|"-h") cat << 'EOF' Dynamic Resource Scaling Automation USAGE: dynamic-resource-scaling.sh [OPTIONS] OPTIONS: --evaluate Run single scaling evaluation (default) --monitor Start continuous monitoring mode --setup Set up systemd service for continuous monitoring --status Show current status of all scalable services --help, -h Show this help message EXAMPLES: # Single evaluation ./dynamic-resource-scaling.sh --evaluate # Check current status ./dynamic-resource-scaling.sh --status # Set up continuous monitoring ./dynamic-resource-scaling.sh --setup CONFIGURATION: Edit the script to modify: - CPU_HIGH_THRESHOLD: Scale up when CPU > 80% - CPU_LOW_THRESHOLD: Scale down when CPU < 20% - MEMORY_HIGH_THRESHOLD: Scale up when Memory > 85% - MEMORY_LOW_THRESHOLD: Scale down when Memory < 30% - MAX_REPLICAS: Maximum replicas per service (5) - MIN_REPLICAS: Minimum replicas per service (1) NOTES: - Requires Docker Swarm mode - Monitors CPU and memory usage - Includes time-based scaling for night hours - Logs all scaling events for audit - Safe scaling with min/max limits EOF ;; *) log "❌ Unknown option: $1" log "Use --help for usage information" exit 1 ;; esac } # Check dependencies if ! command -v bc >/dev/null 2>&1; then log "Installing bc for calculations..." sudo apt-get update && sudo apt-get install -y bc || { log "❌ Failed to install bc. Please install manually." exit 1 } fi # Execute main function main "$@"