Files
HomeAudit/scripts/dynamic-resource-scaling.sh
admin 9ea31368f5 Complete Traefik infrastructure deployment - 60% complete
Major accomplishments:
-  SELinux policy installed and working
-  Core Traefik v2.10 deployment running
-  Production configuration ready (v3.1)
-  Monitoring stack configured
-  Comprehensive documentation created
-  Security hardening implemented

Current status:
- 🟡 Partially deployed (60% complete)
- ⚠️ Docker socket access needs resolution
-  Monitoring stack not deployed yet
- ⚠️ Production migration pending

Next steps:
1. Fix Docker socket permissions
2. Deploy monitoring stack
3. Migrate to production config
4. Validate full functionality

Files added:
- Complete Traefik deployment documentation
- Production and test configurations
- Monitoring stack configurations
- SELinux policy module
- Security checklists and guides
- Current status documentation
2025-08-28 15:22:41 -04:00

414 lines
14 KiB
Bash
Executable File

#!/bin/bash
# Dynamic Resource Scaling Automation
# Automatically scales services based on resource utilization metrics
set -euo pipefail
# Configuration
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(dirname "$SCRIPT_DIR")"
LOG_FILE="$PROJECT_ROOT/logs/resource-scaling-$(date +%Y%m%d-%H%M%S).log"
# Scaling thresholds
CPU_HIGH_THRESHOLD=80
CPU_LOW_THRESHOLD=20
MEMORY_HIGH_THRESHOLD=85
MEMORY_LOW_THRESHOLD=30
# Scaling limits
MAX_REPLICAS=5
MIN_REPLICAS=1
# Services to manage (add more as needed)
SCALABLE_SERVICES=(
"nextcloud_nextcloud"
"immich_immich_server"
"paperless_paperless"
"jellyfin_jellyfin"
"grafana_grafana"
)
# Create directories
mkdir -p "$(dirname "$LOG_FILE")" "$PROJECT_ROOT/logs"
# Logging function
log() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" | tee -a "$LOG_FILE"
}
# Get service metrics
get_service_metrics() {
local service_name="$1"
local metrics=()
# Get running containers for this service
local containers
containers=$(docker service ps "$service_name" --filter "desired-state=running" --format "{{.ID}}" 2>/dev/null || echo "")
if [[ -z "$containers" ]]; then
echo "0 0 0" # cpu_percent memory_percent replica_count
return
fi
# Calculate average metrics across all replicas
local total_cpu=0
local total_memory=0
local container_count=0
while IFS= read -r container_id; do
if [[ -n "$container_id" ]]; then
# Get container stats
local stats
stats=$(docker stats --no-stream --format "{{.CPUPerc}},{{.MemPerc}}" "$(docker ps -q -f name=$container_id)" 2>/dev/null || echo "0.00%,0.00%")
local cpu_percent
local mem_percent
cpu_percent=$(echo "$stats" | cut -d',' -f1 | sed 's/%//')
mem_percent=$(echo "$stats" | cut -d',' -f2 | sed 's/%//')
if [[ "$cpu_percent" =~ ^[0-9]+\.?[0-9]*$ ]] && [[ "$mem_percent" =~ ^[0-9]+\.?[0-9]*$ ]]; then
total_cpu=$(echo "$total_cpu + $cpu_percent" | bc -l)
total_memory=$(echo "$total_memory + $mem_percent" | bc -l)
((container_count++))
fi
fi
done <<< "$containers"
if [[ $container_count -gt 0 ]]; then
local avg_cpu
local avg_memory
avg_cpu=$(echo "scale=2; $total_cpu / $container_count" | bc -l)
avg_memory=$(echo "scale=2; $total_memory / $container_count" | bc -l)
echo "$avg_cpu $avg_memory $container_count"
else
echo "0 0 0"
fi
}
# Get current replica count
get_replica_count() {
local service_name="$1"
docker service ls --filter "name=$service_name" --format "{{.Replicas}}" | cut -d'/' -f1
}
# Scale service up
scale_up() {
local service_name="$1"
local current_replicas="$2"
local new_replicas=$((current_replicas + 1))
if [[ $new_replicas -le $MAX_REPLICAS ]]; then
log "🔼 Scaling UP $service_name: $current_replicas$new_replicas replicas"
docker service update --replicas "$new_replicas" "$service_name" >/dev/null 2>&1 || {
log "❌ Failed to scale up $service_name"
return 1
}
log "✅ Successfully scaled up $service_name"
# Record scaling event
echo "$(date -Iseconds),scale_up,$service_name,$current_replicas,$new_replicas,auto" >> "$PROJECT_ROOT/logs/scaling-events.csv"
else
log "⚠️ $service_name already at maximum replicas ($MAX_REPLICAS)"
fi
}
# Scale service down
scale_down() {
local service_name="$1"
local current_replicas="$2"
local new_replicas=$((current_replicas - 1))
if [[ $new_replicas -ge $MIN_REPLICAS ]]; then
log "🔽 Scaling DOWN $service_name: $current_replicas$new_replicas replicas"
docker service update --replicas "$new_replicas" "$service_name" >/dev/null 2>&1 || {
log "❌ Failed to scale down $service_name"
return 1
}
log "✅ Successfully scaled down $service_name"
# Record scaling event
echo "$(date -Iseconds),scale_down,$service_name,$current_replicas,$new_replicas,auto" >> "$PROJECT_ROOT/logs/scaling-events.csv"
else
log "⚠️ $service_name already at minimum replicas ($MIN_REPLICAS)"
fi
}
# Check if scaling is needed
evaluate_scaling() {
local service_name="$1"
local cpu_percent="$2"
local memory_percent="$3"
local current_replicas="$4"
# Convert to integer for comparison
local cpu_int
local memory_int
cpu_int=$(echo "$cpu_percent" | cut -d'.' -f1)
memory_int=$(echo "$memory_percent" | cut -d'.' -f1)
# Scale up conditions
if [[ $cpu_int -gt $CPU_HIGH_THRESHOLD ]] || [[ $memory_int -gt $MEMORY_HIGH_THRESHOLD ]]; then
log "📊 $service_name metrics: CPU=${cpu_percent}%, Memory=${memory_percent}% - HIGH usage detected"
scale_up "$service_name" "$current_replicas"
return
fi
# Scale down conditions (only if we have more than minimum replicas)
if [[ $current_replicas -gt $MIN_REPLICAS ]] && [[ $cpu_int -lt $CPU_LOW_THRESHOLD ]] && [[ $memory_int -lt $MEMORY_LOW_THRESHOLD ]]; then
log "📊 $service_name metrics: CPU=${cpu_percent}%, Memory=${memory_percent}% - LOW usage detected"
scale_down "$service_name" "$current_replicas"
return
fi
# No scaling needed
log "📊 $service_name metrics: CPU=${cpu_percent}%, Memory=${memory_percent}%, Replicas=$current_replicas - OK"
}
# Time-based scaling (scale down non-critical services at night)
time_based_scaling() {
local current_hour
current_hour=$(date +%H)
# Night hours (2 AM - 6 AM): scale down non-critical services
if [[ $current_hour -ge 2 && $current_hour -le 6 ]]; then
local night_services=("paperless_paperless" "grafana_grafana")
for service in "${night_services[@]}"; do
local current_replicas
current_replicas=$(get_replica_count "$service")
if [[ $current_replicas -gt 1 ]]; then
log "🌙 Night scaling: reducing $service to 1 replica (was $current_replicas)"
docker service update --replicas 1 "$service" >/dev/null 2>&1 || true
echo "$(date -Iseconds),night_scale_down,$service,$current_replicas,1,time_based" >> "$PROJECT_ROOT/logs/scaling-events.csv"
fi
done
fi
# Morning hours (7 AM): scale back up
if [[ $current_hour -eq 7 ]]; then
local morning_services=("paperless_paperless" "grafana_grafana")
for service in "${morning_services[@]}"; do
local current_replicas
current_replicas=$(get_replica_count "$service")
if [[ $current_replicas -lt 2 ]]; then
log "🌅 Morning scaling: restoring $service to 2 replicas (was $current_replicas)"
docker service update --replicas 2 "$service" >/dev/null 2>&1 || true
echo "$(date -Iseconds),morning_scale_up,$service,$current_replicas,2,time_based" >> "$PROJECT_ROOT/logs/scaling-events.csv"
fi
done
fi
}
# Generate scaling report
generate_scaling_report() {
log "Generating scaling report..."
local report_file="$PROJECT_ROOT/logs/scaling-report-$(date +%Y%m%d).yaml"
cat > "$report_file" << EOF
scaling_report:
timestamp: "$(date -Iseconds)"
evaluation_cycle: $(date +%Y%m%d-%H%M%S)
current_state:
EOF
# Add current state of all services
for service in "${SCALABLE_SERVICES[@]}"; do
local metrics
metrics=$(get_service_metrics "$service")
local cpu_percent memory_percent replica_count
read -r cpu_percent memory_percent replica_count <<< "$metrics"
cat >> "$report_file" << EOF
- service: "$service"
replicas: $replica_count
cpu_usage: "${cpu_percent}%"
memory_usage: "${memory_percent}%"
status: $(if docker service ls --filter "name=$service" --format "{{.Name}}" >/dev/null 2>&1; then echo "running"; else echo "not_found"; fi)
EOF
done
# Add scaling events from today
local events_today
events_today=$(grep "$(date +%Y-%m-%d)" "$PROJECT_ROOT/logs/scaling-events.csv" 2>/dev/null | wc -l || echo "0")
cat >> "$report_file" << EOF
daily_summary:
scaling_events_today: $events_today
thresholds:
cpu_high: ${CPU_HIGH_THRESHOLD}%
cpu_low: ${CPU_LOW_THRESHOLD}%
memory_high: ${MEMORY_HIGH_THRESHOLD}%
memory_low: ${MEMORY_LOW_THRESHOLD}%
limits:
max_replicas: $MAX_REPLICAS
min_replicas: $MIN_REPLICAS
EOF
log "✅ Scaling report generated: $report_file"
}
# Setup continuous monitoring
setup_monitoring() {
log "Setting up dynamic scaling monitoring..."
# Create systemd service for continuous monitoring
cat > /tmp/docker-autoscaler.service << 'EOF'
[Unit]
Description=Docker Swarm Auto Scaler
After=docker.service
Requires=docker.service
[Service]
Type=simple
ExecStart=/home/jonathan/Coding/HomeAudit/scripts/dynamic-resource-scaling.sh --monitor
Restart=always
RestartSec=60
User=root
[Install]
WantedBy=multi-user.target
EOF
# Create monitoring loop script
cat > "$PROJECT_ROOT/scripts/scaling-monitor-loop.sh" << 'EOF'
#!/bin/bash
# Continuous monitoring loop for dynamic scaling
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR"
while true; do
# Run scaling evaluation
./dynamic-resource-scaling.sh --evaluate
# Wait 5 minutes between evaluations
sleep 300
done
EOF
chmod +x "$PROJECT_ROOT/scripts/scaling-monitor-loop.sh"
log "✅ Monitoring scripts created"
log "⚠️ To enable: sudo cp /tmp/docker-autoscaler.service /etc/systemd/system/ && sudo systemctl enable --now docker-autoscaler"
}
# Main execution
main() {
case "${1:-evaluate}" in
"--evaluate")
log "🔍 Starting dynamic scaling evaluation..."
# Initialize CSV file if it doesn't exist
if [[ ! -f "$PROJECT_ROOT/logs/scaling-events.csv" ]]; then
echo "timestamp,action,service,old_replicas,new_replicas,trigger" > "$PROJECT_ROOT/logs/scaling-events.csv"
fi
# Check each scalable service
for service in "${SCALABLE_SERVICES[@]}"; do
if docker service ls --filter "name=$service" --format "{{.Name}}" >/dev/null 2>&1; then
local metrics
metrics=$(get_service_metrics "$service")
local cpu_percent memory_percent current_replicas
read -r cpu_percent memory_percent current_replicas <<< "$metrics"
evaluate_scaling "$service" "$cpu_percent" "$memory_percent" "$current_replicas"
else
log "⚠️ Service not found: $service"
fi
done
# Apply time-based scaling
time_based_scaling
# Generate report
generate_scaling_report
;;
"--monitor")
log "🔄 Starting continuous monitoring mode..."
while true; do
./dynamic-resource-scaling.sh --evaluate
sleep 300 # 5-minute intervals
done
;;
"--setup")
setup_monitoring
;;
"--status")
log "📊 Current service status:"
for service in "${SCALABLE_SERVICES[@]}"; do
if docker service ls --filter "name=$service" --format "{{.Name}}" >/dev/null 2>&1; then
local metrics
metrics=$(get_service_metrics "$service")
local cpu_percent memory_percent current_replicas
read -r cpu_percent memory_percent current_replicas <<< "$metrics"
log " $service: ${current_replicas} replicas, CPU=${cpu_percent}%, Memory=${memory_percent}%"
else
log " $service: not found"
fi
done
;;
"--help"|"-h")
cat << 'EOF'
Dynamic Resource Scaling Automation
USAGE:
dynamic-resource-scaling.sh [OPTIONS]
OPTIONS:
--evaluate Run single scaling evaluation (default)
--monitor Start continuous monitoring mode
--setup Set up systemd service for continuous monitoring
--status Show current status of all scalable services
--help, -h Show this help message
EXAMPLES:
# Single evaluation
./dynamic-resource-scaling.sh --evaluate
# Check current status
./dynamic-resource-scaling.sh --status
# Set up continuous monitoring
./dynamic-resource-scaling.sh --setup
CONFIGURATION:
Edit the script to modify:
- CPU_HIGH_THRESHOLD: Scale up when CPU > 80%
- CPU_LOW_THRESHOLD: Scale down when CPU < 20%
- MEMORY_HIGH_THRESHOLD: Scale up when Memory > 85%
- MEMORY_LOW_THRESHOLD: Scale down when Memory < 30%
- MAX_REPLICAS: Maximum replicas per service (5)
- MIN_REPLICAS: Minimum replicas per service (1)
NOTES:
- Requires Docker Swarm mode
- Monitors CPU and memory usage
- Includes time-based scaling for night hours
- Logs all scaling events for audit
- Safe scaling with min/max limits
EOF
;;
*)
log "❌ Unknown option: $1"
log "Use --help for usage information"
exit 1
;;
esac
}
# Check dependencies
if ! command -v bc >/dev/null 2>&1; then
log "Installing bc for calculations..."
sudo apt-get update && sudo apt-get install -y bc || {
log "❌ Failed to install bc. Please install manually."
exit 1
}
fi
# Execute main function
main "$@"