1795 lines
52 KiB
Bash
Executable File
1795 lines
52 KiB
Bash
Executable File
#!/bin/bash
|
|
set -euo pipefail
|
|
|
|
# Storage Performance Optimizer
|
|
# Optimizes storage performance with SSD caching, database tuning, and I/O optimization
|
|
# Part of the Migration Issues Resolution Framework
|
|
|
|
# Source the error handling library
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
source "${SCRIPT_DIR}/lib/error_handling.sh"
|
|
|
|
# Configuration
|
|
readonly LOG_FILE="${SCRIPT_DIR}/../logs/storage_optimization_$(date +%Y%m%d_%H%M%S).log"
|
|
readonly CONFIG_BACKUP_DIR="${SCRIPT_DIR}/../backups/storage_configs"
|
|
readonly DOCKER_COMPOSE_DIR="${SCRIPT_DIR}/../../"
|
|
|
|
# Initialize logging
|
|
init_logging "$LOG_FILE"
|
|
|
|
main() {
|
|
log_info "Starting storage performance optimization"
|
|
|
|
# Register cleanup function
|
|
register_cleanup cleanup_on_exit
|
|
|
|
# Validate prerequisites
|
|
validate_prerequisites
|
|
|
|
# Analyze current storage configuration
|
|
analyze_storage_configuration
|
|
|
|
# Configure SSD caching
|
|
setup_ssd_caching
|
|
|
|
# Optimize database storage
|
|
optimize_database_storage
|
|
|
|
# Configure filesystem optimizations
|
|
configure_filesystem_optimizations
|
|
|
|
# Setup storage monitoring
|
|
setup_storage_monitoring
|
|
|
|
# Create performance testing tools
|
|
create_performance_testing_tools
|
|
|
|
# Apply Docker volume optimizations
|
|
optimize_docker_volumes
|
|
|
|
log_info "Storage performance optimization completed successfully"
|
|
}
|
|
|
|
validate_prerequisites() {
|
|
log_info "Validating storage optimization prerequisites"
|
|
|
|
local required_commands=(
|
|
"docker" "lsblk" "df" "iostat" "iotop" "smartctl" "tune2fs"
|
|
)
|
|
|
|
for cmd in "${required_commands[@]}"; do
|
|
if ! command -v "$cmd" &>/dev/null; then
|
|
log_error "Required command not found: $cmd"
|
|
exit 1
|
|
fi
|
|
done
|
|
|
|
# Check if running as root or with sudo
|
|
if [[ $EUID -ne 0 ]]; then
|
|
log_error "This script must be run as root or with sudo"
|
|
exit 1
|
|
fi
|
|
|
|
# Install additional tools if needed
|
|
if ! command -v fio &>/dev/null; then
|
|
log_info "Installing fio for storage benchmarking"
|
|
apt-get update && apt-get install -y fio
|
|
fi
|
|
|
|
if ! command -v bcache-super-show &>/dev/null; then
|
|
log_info "Installing bcache tools"
|
|
apt-get update && apt-get install -y bcache-tools
|
|
fi
|
|
|
|
log_info "Prerequisites validation completed"
|
|
}
|
|
|
|
analyze_storage_configuration() {
|
|
log_info "Analyzing current storage configuration"
|
|
|
|
local storage_report="${CONFIG_BACKUP_DIR}/storage_analysis_$(date +%Y%m%d_%H%M%S).txt"
|
|
mkdir -p "$(dirname "$storage_report")"
|
|
|
|
{
|
|
echo "Storage Configuration Analysis Report"
|
|
echo "Generated: $(date)"
|
|
echo "====================================="
|
|
echo
|
|
|
|
echo "Block Devices:"
|
|
lsblk -o NAME,SIZE,TYPE,MOUNTPOINT,FSTYPE,UUID
|
|
echo
|
|
|
|
echo "Filesystem Usage:"
|
|
df -h
|
|
echo
|
|
|
|
echo "Disk I/O Statistics:"
|
|
iostat -x 1 3 || echo "iostat not available"
|
|
echo
|
|
|
|
echo "SMART Status for all drives:"
|
|
for drive in /dev/sd* /dev/nvme*; do
|
|
if [[ -b "$drive" ]]; then
|
|
echo "Drive: $drive"
|
|
smartctl -H "$drive" 2>/dev/null || echo "SMART not available for $drive"
|
|
echo
|
|
fi
|
|
done
|
|
|
|
echo "Current mount options:"
|
|
cat /proc/mounts | grep -E "(ext4|xfs|btrfs|zfs)"
|
|
echo
|
|
|
|
echo "Docker storage driver:"
|
|
docker info | grep -E "(Storage Driver|Backing Filesystem)"
|
|
echo
|
|
|
|
echo "Docker volume list:"
|
|
docker volume ls
|
|
echo
|
|
|
|
if command -v zpool &>/dev/null; then
|
|
echo "ZFS pools:"
|
|
zpool status || echo "No ZFS pools found"
|
|
echo
|
|
fi
|
|
|
|
} > "$storage_report"
|
|
|
|
log_info "Storage analysis report saved to: $storage_report"
|
|
|
|
# Detect storage types
|
|
detect_storage_configuration
|
|
}
|
|
|
|
detect_storage_configuration() {
|
|
log_info "Detecting storage configuration"
|
|
|
|
# Detect SSDs
|
|
SSD_DEVICES=()
|
|
HDD_DEVICES=()
|
|
|
|
for device in /sys/block/*/queue/rotational; do
|
|
if [[ -r "$device" ]]; then
|
|
device_name=$(echo "$device" | cut -d'/' -f4)
|
|
if [[ $(cat "$device") -eq 0 ]]; then
|
|
SSD_DEVICES+=("/dev/$device_name")
|
|
log_info "Detected SSD: /dev/$device_name"
|
|
else
|
|
HDD_DEVICES+=("/dev/$device_name")
|
|
log_info "Detected HDD: /dev/$device_name"
|
|
fi
|
|
fi
|
|
done
|
|
|
|
# Detect filesystem types
|
|
ROOT_FS=$(findmnt -n -o FSTYPE /)
|
|
DATA_MOUNT_POINTS=$(df -t ext4 -t xfs -t btrfs -t zfs --output=target | tail -n +2)
|
|
|
|
export SSD_DEVICES HDD_DEVICES ROOT_FS DATA_MOUNT_POINTS
|
|
|
|
log_info "Storage detection completed - SSDs: ${#SSD_DEVICES[@]}, HDDs: ${#HDD_DEVICES[@]}"
|
|
}
|
|
|
|
setup_ssd_caching() {
|
|
log_info "Setting up SSD caching"
|
|
|
|
if [[ ${#SSD_DEVICES[@]} -eq 0 ]]; then
|
|
log_warn "No SSDs detected - skipping SSD caching setup"
|
|
return 0
|
|
fi
|
|
|
|
# Configure bcache for SSD caching if multiple devices available
|
|
if [[ ${#SSD_DEVICES[@]} -gt 0 && ${#HDD_DEVICES[@]} -gt 0 ]]; then
|
|
setup_bcache_caching
|
|
fi
|
|
|
|
# Configure filesystem caching optimizations
|
|
configure_filesystem_caching
|
|
|
|
# Setup Docker volume caching
|
|
setup_docker_volume_caching
|
|
}
|
|
|
|
setup_bcache_caching() {
|
|
log_info "Setting up bcache SSD caching"
|
|
|
|
# Note: bcache setup requires careful consideration and testing
|
|
# This implementation provides the framework but requires manual validation
|
|
|
|
cat > "${CONFIG_BACKUP_DIR}/bcache_setup_guide.md" << 'EOF'
|
|
# Bcache SSD Caching Setup Guide
|
|
|
|
**WARNING**: Bcache setup can be destructive. Test in staging environment first.
|
|
|
|
## Prerequisites
|
|
- At least one SSD and one HDD
|
|
- Data backed up
|
|
- System in maintenance mode
|
|
|
|
## Setup Steps
|
|
|
|
1. **Identify devices**:
|
|
```bash
|
|
# SSD for cache (will be wiped)
|
|
CACHE_DEVICE="/dev/sdb"
|
|
# HDD for backing device (will be wiped)
|
|
BACKING_DEVICE="/dev/sdc"
|
|
```
|
|
|
|
2. **Create bcache devices**:
|
|
```bash
|
|
# Make cache device
|
|
make-bcache -C $CACHE_DEVICE
|
|
|
|
# Make backing device
|
|
make-bcache -B $BACKING_DEVICE
|
|
|
|
# Register cache
|
|
echo $CACHE_DEVICE > /sys/fs/bcache/register
|
|
echo $BACKING_DEVICE > /sys/fs/bcache/register
|
|
```
|
|
|
|
3. **Attach cache to backing device**:
|
|
```bash
|
|
# Find bcache device
|
|
BCACHE_DEVICE=$(ls /dev/bcache*)
|
|
|
|
# Attach cache
|
|
echo $(ls /sys/fs/bcache/ | grep -E "^[0-9a-f-]+$" | head -1) > \
|
|
/sys/block/$(basename $BCACHE_DEVICE)/bcache/attach
|
|
```
|
|
|
|
4. **Configure caching policy**:
|
|
```bash
|
|
# Set to writeback mode for better performance
|
|
echo writeback > /sys/block/$(basename $BCACHE_DEVICE)/bcache/cache_mode
|
|
```
|
|
|
|
## Current System Analysis
|
|
EOF
|
|
|
|
# Add current system analysis to the guide
|
|
{
|
|
echo
|
|
echo "### Current Storage Devices"
|
|
echo "SSDs detected: ${SSD_DEVICES[*]}"
|
|
echo "HDDs detected: ${HDD_DEVICES[*]}"
|
|
echo
|
|
echo "### Recommended Configuration"
|
|
if [[ ${#SSD_DEVICES[@]} -gt 0 ]] && [[ ${#HDD_DEVICES[@]} -gt 0 ]]; then
|
|
echo "- Use ${SSD_DEVICES[0]} as cache device"
|
|
echo "- Use ${HDD_DEVICES[0]} as backing device"
|
|
echo "- This will provide SSD-accelerated storage for Docker volumes"
|
|
else
|
|
echo "- Not enough devices for bcache setup"
|
|
echo "- Consider filesystem-level optimizations instead"
|
|
fi
|
|
} >> "${CONFIG_BACKUP_DIR}/bcache_setup_guide.md"
|
|
|
|
log_info "Bcache setup guide created at: ${CONFIG_BACKUP_DIR}/bcache_setup_guide.md"
|
|
log_warn "Manual bcache setup required - see guide for details"
|
|
}
|
|
|
|
configure_filesystem_caching() {
|
|
log_info "Configuring filesystem-level caching optimizations"
|
|
|
|
# Configure read-ahead settings for better sequential performance
|
|
for device in "${SSD_DEVICES[@]}" "${HDD_DEVICES[@]}"; do
|
|
device_name=$(basename "$device")
|
|
|
|
# Set read-ahead for SSDs (smaller) and HDDs (larger)
|
|
if [[ " ${SSD_DEVICES[*]} " =~ " ${device} " ]]; then
|
|
# SSD: smaller read-ahead since random access is fast
|
|
echo 256 > "/sys/block/$device_name/queue/read_ahead_kb"
|
|
log_info "Set SSD read-ahead to 256KB for $device"
|
|
else
|
|
# HDD: larger read-ahead for better sequential performance
|
|
echo 1024 > "/sys/block/$device_name/queue/read_ahead_kb"
|
|
log_info "Set HDD read-ahead to 1024KB for $device"
|
|
fi
|
|
|
|
# Configure I/O scheduler
|
|
if [[ " ${SSD_DEVICES[*]} " =~ " ${device} " ]]; then
|
|
# Use mq-deadline for SSDs
|
|
echo "mq-deadline" > "/sys/block/$device_name/queue/scheduler" 2>/dev/null || \
|
|
echo "deadline" > "/sys/block/$device_name/queue/scheduler" 2>/dev/null || \
|
|
log_warn "Could not set I/O scheduler for $device"
|
|
log_info "Set mq-deadline scheduler for SSD $device"
|
|
else
|
|
# Use mq-deadline for HDDs too (generally better than CFQ for modern workloads)
|
|
echo "mq-deadline" > "/sys/block/$device_name/queue/scheduler" 2>/dev/null || \
|
|
echo "deadline" > "/sys/block/$device_name/queue/scheduler" 2>/dev/null || \
|
|
log_warn "Could not set I/O scheduler for $device"
|
|
log_info "Set mq-deadline scheduler for HDD $device"
|
|
fi
|
|
done
|
|
|
|
# Make read-ahead settings persistent
|
|
cat > "/etc/udev/rules.d/60-storage-optimization.rules" << 'EOF'
|
|
# Storage optimization rules
|
|
# SSD read-ahead optimization
|
|
ACTION=="add|change", KERNEL=="sd[a-z]", ATTR{queue/rotational}=="0", ATTR{queue/read_ahead_kb}="256"
|
|
ACTION=="add|change", KERNEL=="nvme[0-9]n[0-9]", ATTR{queue/read_ahead_kb}="256"
|
|
|
|
# HDD read-ahead optimization
|
|
ACTION=="add|change", KERNEL=="sd[a-z]", ATTR{queue/rotational}=="1", ATTR{queue/read_ahead_kb}="1024"
|
|
|
|
# I/O scheduler optimization
|
|
ACTION=="add|change", KERNEL=="sd[a-z]", ATTR{queue/rotational}=="0", ATTR{queue/scheduler}="mq-deadline"
|
|
ACTION=="add|change", KERNEL=="sd[a-z]", ATTR{queue/rotational}=="1", ATTR{queue/scheduler}="mq-deadline"
|
|
ACTION=="add|change", KERNEL=="nvme[0-9]n[0-9]", ATTR{queue/scheduler}="none"
|
|
EOF
|
|
|
|
# Reload udev rules
|
|
udevadm control --reload-rules && udevadm trigger
|
|
|
|
log_info "Filesystem caching optimizations applied"
|
|
}
|
|
|
|
setup_docker_volume_caching() {
|
|
log_info "Setting up Docker volume caching optimizations"
|
|
|
|
# Configure Docker daemon for storage optimization
|
|
local docker_daemon_config="/etc/docker/daemon.json"
|
|
local backup_file="${CONFIG_BACKUP_DIR}/daemon.json.backup.$(date +%Y%m%d_%H%M%S)"
|
|
|
|
mkdir -p "$(dirname "$backup_file")"
|
|
|
|
if [[ -f "$docker_daemon_config" ]]; then
|
|
cp "$docker_daemon_config" "$backup_file"
|
|
log_info "Docker daemon config backed up to: $backup_file"
|
|
fi
|
|
|
|
# Create optimized Docker daemon configuration
|
|
python3 << 'EOF'
|
|
import json
|
|
import os
|
|
|
|
daemon_config_path = "/etc/docker/daemon.json"
|
|
config = {}
|
|
|
|
# Load existing config if it exists
|
|
if os.path.exists(daemon_config_path):
|
|
try:
|
|
with open(daemon_config_path, 'r') as f:
|
|
config = json.load(f)
|
|
except:
|
|
config = {}
|
|
|
|
# Add storage optimizations
|
|
config.update({
|
|
"storage-driver": "overlay2",
|
|
"storage-opts": [
|
|
"overlay2.override_kernel_check=true"
|
|
],
|
|
"log-driver": "json-file",
|
|
"log-opts": {
|
|
"max-size": "10m",
|
|
"max-file": "3"
|
|
},
|
|
"data-root": "/var/lib/docker",
|
|
"exec-opts": ["native.cgroupdriver=systemd"],
|
|
"live-restore": True,
|
|
"userland-proxy": False,
|
|
"experimental": False
|
|
})
|
|
|
|
# Write updated config
|
|
with open(daemon_config_path, 'w') as f:
|
|
json.dump(config, f, indent=2)
|
|
|
|
print("Docker daemon configuration updated")
|
|
EOF
|
|
|
|
# Create optimized volume mount options script
|
|
cat > "${SCRIPT_DIR}/optimize_volume_mounts.sh" << 'EOF'
|
|
#!/bin/bash
|
|
# Docker Volume Mount Optimization Script
|
|
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
source "${SCRIPT_DIR}/lib/error_handling.sh"
|
|
|
|
optimize_volume_mounts() {
|
|
log_info "Optimizing Docker volume mount options"
|
|
|
|
# Get all Docker volumes
|
|
local volumes=$(docker volume ls -q)
|
|
|
|
for volume in $volumes; do
|
|
local volume_path=$(docker volume inspect "$volume" --format '{{.Mountpoint}}')
|
|
|
|
if [[ -d "$volume_path" ]]; then
|
|
# Optimize directory for database workloads if it contains database files
|
|
if [[ "$volume" =~ (postgres|mysql|mongodb|redis|influx) ]]; then
|
|
log_info "Optimizing database volume: $volume"
|
|
|
|
# Set optimal ownership and permissions
|
|
chown -R 999:999 "$volume_path" 2>/dev/null || true
|
|
|
|
# For ext4 filesystems, set optimal attributes
|
|
if findmnt -n -o FSTYPE "$volume_path" | grep -q ext4; then
|
|
# Enable extent attributes for large files
|
|
find "$volume_path" -type f -size +100M -exec chattr +e {} \; 2>/dev/null || true
|
|
fi
|
|
fi
|
|
|
|
# Set optimal permissions for all volumes
|
|
chmod 755 "$volume_path"
|
|
fi
|
|
done
|
|
|
|
log_info "Volume mount optimization completed"
|
|
}
|
|
|
|
optimize_volume_mounts
|
|
EOF
|
|
|
|
chmod +x "${SCRIPT_DIR}/optimize_volume_mounts.sh"
|
|
|
|
# Restart Docker to apply changes
|
|
systemctl restart docker
|
|
|
|
log_info "Docker volume caching optimizations completed"
|
|
}
|
|
|
|
optimize_database_storage() {
|
|
log_info "Optimizing database storage configurations"
|
|
|
|
# Create PostgreSQL optimization configuration
|
|
create_postgresql_optimizations
|
|
|
|
# Create Redis optimization configuration
|
|
create_redis_optimizations
|
|
|
|
# Create InfluxDB optimization configuration
|
|
create_influxdb_optimizations
|
|
|
|
# Setup database connection pooling
|
|
setup_connection_pooling
|
|
}
|
|
|
|
create_postgresql_optimizations() {
|
|
log_info "Creating PostgreSQL storage optimizations"
|
|
|
|
local postgres_config_dir="${DOCKER_COMPOSE_DIR}/postgres"
|
|
mkdir -p "$postgres_config_dir"
|
|
|
|
# Get system memory for calculations
|
|
local total_memory_kb=$(grep MemTotal /proc/meminfo | awk '{print $2}')
|
|
local total_memory_mb=$((total_memory_kb / 1024))
|
|
|
|
# Calculate optimal PostgreSQL settings
|
|
local shared_buffers=$((total_memory_mb / 4)) # 25% of total memory
|
|
local effective_cache_size=$((total_memory_mb * 3 / 4)) # 75% of total memory
|
|
local work_mem=$((total_memory_mb / 64)) # Conservative work_mem
|
|
local maintenance_work_mem=$((total_memory_mb / 16)) # For maintenance operations
|
|
|
|
cat > "${postgres_config_dir}/postgresql.conf.optimized" << EOF
|
|
# PostgreSQL Optimized Configuration
|
|
# Generated: $(date)
|
|
# System Memory: ${total_memory_mb}MB
|
|
|
|
# Memory Configuration
|
|
shared_buffers = ${shared_buffers}MB
|
|
effective_cache_size = ${effective_cache_size}MB
|
|
work_mem = ${work_mem}MB
|
|
maintenance_work_mem = ${maintenance_work_mem}MB
|
|
|
|
# Storage and I/O Configuration
|
|
wal_buffers = 16MB
|
|
checkpoint_completion_target = 0.9
|
|
checkpoint_timeout = 15min
|
|
max_wal_size = 2GB
|
|
min_wal_size = 512MB
|
|
|
|
# Connection Configuration
|
|
max_connections = 200
|
|
shared_preload_libraries = 'pg_stat_statements'
|
|
|
|
# Query Planning
|
|
default_statistics_target = 100
|
|
random_page_cost = 1.1 # Optimized for SSD
|
|
seq_page_cost = 1.0
|
|
|
|
# Write-Ahead Logging
|
|
wal_compression = on
|
|
wal_log_hints = on
|
|
full_page_writes = on
|
|
|
|
# Performance and Monitoring
|
|
track_activities = on
|
|
track_counts = on
|
|
track_io_timing = on
|
|
track_functions = all
|
|
log_min_duration_statement = 1000 # Log queries over 1 second
|
|
|
|
# Vacuum and Autovacuum
|
|
autovacuum = on
|
|
autovacuum_max_workers = 3
|
|
autovacuum_naptime = 30s
|
|
autovacuum_vacuum_scale_factor = 0.1
|
|
autovacuum_analyze_scale_factor = 0.05
|
|
|
|
# Replication (if using)
|
|
wal_level = replica
|
|
archive_mode = off
|
|
|
|
# Background Writer
|
|
bgwriter_delay = 200ms
|
|
bgwriter_lru_maxpages = 100
|
|
bgwriter_lru_multiplier = 2.0
|
|
EOF
|
|
|
|
# Create Docker Compose override for PostgreSQL
|
|
cat > "${postgres_config_dir}/docker-compose.postgres-optimized.yml" << 'EOF'
|
|
version: '3.8'
|
|
services:
|
|
postgres:
|
|
image: postgres:15-alpine
|
|
container_name: postgres_optimized
|
|
restart: unless-stopped
|
|
environment:
|
|
- POSTGRES_DB=homelab
|
|
- POSTGRES_USER=postgres
|
|
- POSTGRES_PASSWORD_FILE=/run/secrets/postgres_password
|
|
volumes:
|
|
- postgres_data:/var/lib/postgresql/data
|
|
- ./postgresql.conf.optimized:/etc/postgresql/postgresql.conf
|
|
- /dev/shm:/dev/shm # Shared memory optimization
|
|
command: postgres -c config_file=/etc/postgresql/postgresql.conf
|
|
deploy:
|
|
resources:
|
|
limits:
|
|
memory: 1G
|
|
reservations:
|
|
memory: 512M
|
|
secrets:
|
|
- postgres_password
|
|
networks:
|
|
- data_network
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "pg_isready -U postgres"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
|
|
volumes:
|
|
postgres_data:
|
|
driver: local
|
|
driver_opts:
|
|
type: none
|
|
o: bind
|
|
device: /opt/homelab/postgres/data
|
|
|
|
secrets:
|
|
postgres_password:
|
|
external: true
|
|
|
|
networks:
|
|
data_network:
|
|
external: true
|
|
EOF
|
|
|
|
log_info "PostgreSQL optimization configuration created"
|
|
}
|
|
|
|
create_redis_optimizations() {
|
|
log_info "Creating Redis storage optimizations"
|
|
|
|
local redis_config_dir="${DOCKER_COMPOSE_DIR}/redis"
|
|
mkdir -p "$redis_config_dir"
|
|
|
|
cat > "${redis_config_dir}/redis.conf.optimized" << 'EOF'
|
|
# Redis Optimized Configuration
|
|
# Generated for high-performance home lab setup
|
|
|
|
# Basic Configuration
|
|
daemonize no
|
|
port 6379
|
|
bind 0.0.0.0
|
|
protected-mode yes
|
|
requirepass changeme_use_docker_secrets
|
|
|
|
# Memory Management
|
|
maxmemory 256mb
|
|
maxmemory-policy allkeys-lru
|
|
maxmemory-samples 5
|
|
|
|
# Persistence Configuration (balanced between performance and durability)
|
|
save 900 1
|
|
save 300 10
|
|
save 60 10000
|
|
stop-writes-on-bgsave-error yes
|
|
rdbcompression yes
|
|
rdbchecksum yes
|
|
dbfilename dump.rdb
|
|
|
|
# AOF Configuration (for better durability)
|
|
appendonly yes
|
|
appendfilename "appendonly.aof"
|
|
appendfsync everysec
|
|
no-appendfsync-on-rewrite no
|
|
auto-aof-rewrite-percentage 100
|
|
auto-aof-rewrite-min-size 64mb
|
|
|
|
# Network and Connection Configuration
|
|
timeout 300
|
|
tcp-keepalive 300
|
|
tcp-backlog 511
|
|
|
|
# Performance Tuning
|
|
hash-max-ziplist-entries 512
|
|
hash-max-ziplist-value 64
|
|
list-max-ziplist-size -2
|
|
list-compress-depth 0
|
|
set-max-intset-entries 512
|
|
zset-max-ziplist-entries 128
|
|
zset-max-ziplist-value 64
|
|
|
|
# Logging
|
|
loglevel notice
|
|
logfile ""
|
|
syslog-enabled no
|
|
|
|
# Advanced Configuration
|
|
databases 16
|
|
latency-monitor-threshold 100
|
|
notify-keyspace-events ""
|
|
|
|
# Security
|
|
rename-command FLUSHDB ""
|
|
rename-command FLUSHALL ""
|
|
rename-command DEBUG ""
|
|
rename-command CONFIG "CONFIG_a83b9c74d0e3f2a1"
|
|
EOF
|
|
|
|
# Create Docker Compose override for Redis
|
|
cat > "${redis_config_dir}/docker-compose.redis-optimized.yml" << 'EOF'
|
|
version: '3.8'
|
|
services:
|
|
redis:
|
|
image: redis:7-alpine
|
|
container_name: redis_optimized
|
|
restart: unless-stopped
|
|
volumes:
|
|
- redis_data:/data
|
|
- ./redis.conf.optimized:/etc/redis/redis.conf
|
|
- /dev/shm:/dev/shm
|
|
command: redis-server /etc/redis/redis.conf
|
|
deploy:
|
|
resources:
|
|
limits:
|
|
memory: 512M
|
|
reservations:
|
|
memory: 256M
|
|
networks:
|
|
- data_network
|
|
healthcheck:
|
|
test: ["CMD", "redis-cli", "ping"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
|
|
volumes:
|
|
redis_data:
|
|
driver: local
|
|
driver_opts:
|
|
type: none
|
|
o: bind
|
|
device: /opt/homelab/redis/data
|
|
|
|
networks:
|
|
data_network:
|
|
external: true
|
|
EOF
|
|
|
|
log_info "Redis optimization configuration created"
|
|
}
|
|
|
|
create_influxdb_optimizations() {
|
|
log_info "Creating InfluxDB storage optimizations"
|
|
|
|
local influxdb_config_dir="${DOCKER_COMPOSE_DIR}/influxdb"
|
|
mkdir -p "$influxdb_config_dir"
|
|
|
|
cat > "${influxdb_config_dir}/influxdb.conf.optimized" << 'EOF'
|
|
# InfluxDB Optimized Configuration
|
|
# Generated for time-series monitoring workloads
|
|
|
|
[meta]
|
|
dir = "/var/lib/influxdb/meta"
|
|
retention-autocreate = true
|
|
logging-enabled = true
|
|
|
|
[data]
|
|
dir = "/var/lib/influxdb/data"
|
|
wal-dir = "/var/lib/influxdb/wal"
|
|
|
|
# Storage engine settings
|
|
cache-max-memory-size = "256m"
|
|
cache-snapshot-memory-size = "25m"
|
|
cache-snapshot-write-cold-duration = "10m"
|
|
|
|
# Compaction settings
|
|
compact-full-write-cold-duration = "4h"
|
|
compact-throughput = "48m"
|
|
compact-throughput-burst = "48m"
|
|
|
|
# WAL settings
|
|
wal-fsync-delay = "0s"
|
|
|
|
# Query settings
|
|
query-timeout = "0s"
|
|
max-concurrent-queries = 0
|
|
|
|
# Series and measurement limits
|
|
max-series-per-database = 1000000
|
|
max-values-per-tag = 100000
|
|
|
|
[coordinator]
|
|
write-timeout = "10s"
|
|
max-concurrent-queries = 0
|
|
query-timeout = "0s"
|
|
log-queries-after = "0s"
|
|
max-select-point = 0
|
|
max-select-series = 0
|
|
max-select-buckets = 0
|
|
|
|
[retention]
|
|
enabled = true
|
|
check-interval = "30m"
|
|
|
|
[shard-precreation]
|
|
enabled = true
|
|
check-interval = "10m"
|
|
advance-period = "30m"
|
|
|
|
[monitor]
|
|
store-enabled = true
|
|
store-database = "_internal"
|
|
store-interval = "10s"
|
|
|
|
[admin]
|
|
enabled = false
|
|
|
|
[http]
|
|
enabled = true
|
|
bind-address = ":8086"
|
|
auth-enabled = false
|
|
log-enabled = true
|
|
write-tracing = false
|
|
pprof-enabled = true
|
|
debug-pprof-enabled = false
|
|
https-enabled = false
|
|
|
|
[logging]
|
|
format = "auto"
|
|
level = "info"
|
|
suppress-logo = false
|
|
|
|
[[graphite]]
|
|
enabled = false
|
|
|
|
[[collectd]]
|
|
enabled = false
|
|
|
|
[[opentsdb]]
|
|
enabled = false
|
|
|
|
[[udp]]
|
|
enabled = false
|
|
EOF
|
|
|
|
# Create Docker Compose override for InfluxDB
|
|
cat > "${influxdb_config_dir}/docker-compose.influxdb-optimized.yml" << 'EOF'
|
|
version: '3.8'
|
|
services:
|
|
influxdb:
|
|
image: influxdb:1.8-alpine
|
|
container_name: influxdb_optimized
|
|
restart: unless-stopped
|
|
environment:
|
|
- INFLUXDB_DB=homelab
|
|
- INFLUXDB_ADMIN_USER=admin
|
|
- INFLUXDB_ADMIN_PASSWORD_FILE=/run/secrets/influxdb_admin_password
|
|
- INFLUXDB_USER=homelab
|
|
- INFLUXDB_USER_PASSWORD_FILE=/run/secrets/influxdb_user_password
|
|
volumes:
|
|
- influxdb_data:/var/lib/influxdb
|
|
- ./influxdb.conf.optimized:/etc/influxdb/influxdb.conf
|
|
- /dev/shm:/dev/shm
|
|
command: influxd -config /etc/influxdb/influxdb.conf
|
|
ports:
|
|
- "8086:8086"
|
|
deploy:
|
|
resources:
|
|
limits:
|
|
memory: 1G
|
|
reservations:
|
|
memory: 512M
|
|
secrets:
|
|
- influxdb_admin_password
|
|
- influxdb_user_password
|
|
networks:
|
|
- monitoring_network
|
|
healthcheck:
|
|
test: ["CMD", "curl", "-f", "http://localhost:8086/ping"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
|
|
volumes:
|
|
influxdb_data:
|
|
driver: local
|
|
driver_opts:
|
|
type: none
|
|
o: bind
|
|
device: /opt/homelab/influxdb/data
|
|
|
|
secrets:
|
|
influxdb_admin_password:
|
|
external: true
|
|
influxdb_user_password:
|
|
external: true
|
|
|
|
networks:
|
|
monitoring_network:
|
|
external: true
|
|
EOF
|
|
|
|
log_info "InfluxDB optimization configuration created"
|
|
}
|
|
|
|
setup_connection_pooling() {
|
|
log_info "Setting up database connection pooling"
|
|
|
|
local pooling_config_dir="${DOCKER_COMPOSE_DIR}/connection-pooling"
|
|
mkdir -p "$pooling_config_dir"
|
|
|
|
# Create PgBouncer configuration for PostgreSQL
|
|
cat > "${pooling_config_dir}/pgbouncer.ini" << 'EOF'
|
|
[databases]
|
|
homelab = host=postgres port=5432 dbname=homelab
|
|
immich = host=postgres port=5432 dbname=immich
|
|
nextcloud = host=postgres port=5432 dbname=nextcloud
|
|
|
|
[pgbouncer]
|
|
listen_addr = 0.0.0.0
|
|
listen_port = 5432
|
|
auth_type = md5
|
|
auth_file = /etc/pgbouncer/userlist.txt
|
|
admin_users = postgres
|
|
stats_users = postgres
|
|
|
|
# Connection pooling settings
|
|
pool_mode = transaction
|
|
max_client_conn = 1000
|
|
default_pool_size = 20
|
|
min_pool_size = 5
|
|
reserve_pool_size = 5
|
|
reserve_pool_timeout = 5
|
|
max_db_connections = 100
|
|
max_user_connections = 100
|
|
|
|
# Performance settings
|
|
server_reset_query = DISCARD ALL
|
|
server_check_query = select 1
|
|
server_check_delay = 30
|
|
max_packet_size = 2147483647
|
|
|
|
# Logging
|
|
log_connections = 1
|
|
log_disconnections = 1
|
|
log_pooler_errors = 1
|
|
|
|
# Timeouts
|
|
server_lifetime = 3600
|
|
server_idle_timeout = 600
|
|
client_idle_timeout = 0
|
|
client_login_timeout = 60
|
|
autodb_idle_timeout = 3600
|
|
|
|
# Security
|
|
ignore_startup_parameters = extra_float_digits
|
|
|
|
# Advanced
|
|
application_name_add_host = 1
|
|
EOF
|
|
|
|
# Create PgBouncer Docker service
|
|
cat > "${pooling_config_dir}/docker-compose.pgbouncer.yml" << 'EOF'
|
|
version: '3.8'
|
|
services:
|
|
pgbouncer:
|
|
image: pgbouncer/pgbouncer:latest
|
|
container_name: pgbouncer
|
|
restart: unless-stopped
|
|
environment:
|
|
- DATABASES_HOST=postgres
|
|
- DATABASES_PORT=5432
|
|
- POOL_MODE=transaction
|
|
- DEFAULT_POOL_SIZE=20
|
|
- MAX_CLIENT_CONN=1000
|
|
volumes:
|
|
- ./pgbouncer.ini:/etc/pgbouncer/pgbouncer.ini
|
|
- pgbouncer_logs:/var/log/pgbouncer
|
|
ports:
|
|
- "6432:5432"
|
|
depends_on:
|
|
- postgres
|
|
deploy:
|
|
resources:
|
|
limits:
|
|
memory: 256M
|
|
reservations:
|
|
memory: 128M
|
|
networks:
|
|
- data_network
|
|
healthcheck:
|
|
test: ["CMD", "psql", "-h", "localhost", "-p", "5432", "-U", "postgres", "-c", "SELECT 1"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
|
|
volumes:
|
|
pgbouncer_logs:
|
|
|
|
networks:
|
|
data_network:
|
|
external: true
|
|
EOF
|
|
|
|
log_info "Database connection pooling configuration created"
|
|
}
|
|
|
|
configure_filesystem_optimizations() {
|
|
log_info "Configuring filesystem-level optimizations"
|
|
|
|
# Create filesystem optimization configuration
|
|
local fs_config_dir="${CONFIG_BACKUP_DIR}/filesystem"
|
|
mkdir -p "$fs_config_dir"
|
|
|
|
# Optimize ext4 filesystems
|
|
optimize_ext4_filesystems
|
|
|
|
# Configure swap optimization
|
|
configure_swap_optimization
|
|
|
|
# Setup transparent huge pages
|
|
configure_transparent_hugepages
|
|
|
|
# Configure kernel parameters
|
|
configure_kernel_parameters
|
|
}
|
|
|
|
optimize_ext4_filesystems() {
|
|
log_info "Optimizing ext4 filesystems"
|
|
|
|
# Find ext4 filesystems
|
|
local ext4_filesystems
|
|
ext4_filesystems=$(findmnt -t ext4 -o TARGET --noheadings)
|
|
|
|
for fs in $ext4_filesystems; do
|
|
local device
|
|
device=$(findmnt -n -o SOURCE "$fs")
|
|
|
|
if [[ -b "$device" ]]; then
|
|
log_info "Optimizing ext4 filesystem: $fs ($device)"
|
|
|
|
# Get current mount options
|
|
local current_opts
|
|
current_opts=$(findmnt -n -o OPTIONS "$fs")
|
|
|
|
# Check if we need to remount with optimizations
|
|
if ! echo "$current_opts" | grep -q "noatime"; then
|
|
log_info "Adding noatime option to $fs"
|
|
mount -o remount,noatime "$fs" || log_warn "Failed to add noatime to $fs"
|
|
fi
|
|
|
|
# Update fstab for persistence
|
|
local fstab_backup="${CONFIG_BACKUP_DIR}/fstab.backup.$(date +%Y%m%d_%H%M%S)"
|
|
cp /etc/fstab "$fstab_backup"
|
|
|
|
# Add or update noatime in fstab
|
|
sed -i "s|$device.*ext4.*defaults|$device $fs ext4 defaults,noatime|g" /etc/fstab || \
|
|
log_warn "Could not update fstab for $device"
|
|
fi
|
|
done
|
|
|
|
log_info "ext4 filesystem optimization completed"
|
|
}
|
|
|
|
configure_swap_optimization() {
|
|
log_info "Configuring swap optimization"
|
|
|
|
# Configure swappiness for better performance
|
|
local current_swappiness
|
|
current_swappiness=$(cat /proc/sys/vm/swappiness)
|
|
|
|
if [[ "$current_swappiness" -ne 10 ]]; then
|
|
echo 10 > /proc/sys/vm/swappiness
|
|
log_info "Set swappiness to 10 (was $current_swappiness)"
|
|
|
|
# Make persistent
|
|
echo "vm.swappiness=10" >> /etc/sysctl.conf
|
|
fi
|
|
|
|
# Configure vfs_cache_pressure
|
|
echo 50 > /proc/sys/vm/vfs_cache_pressure
|
|
echo "vm.vfs_cache_pressure=50" >> /etc/sysctl.conf
|
|
|
|
log_info "Swap optimization completed"
|
|
}
|
|
|
|
configure_transparent_hugepages() {
|
|
log_info "Configuring transparent huge pages"
|
|
|
|
# Disable THP for database workloads (often better performance)
|
|
if [[ -f /sys/kernel/mm/transparent_hugepage/enabled ]]; then
|
|
echo never > /sys/kernel/mm/transparent_hugepage/enabled
|
|
echo never > /sys/kernel/mm/transparent_hugepage/defrag
|
|
|
|
# Make persistent
|
|
cat >> /etc/rc.local << 'EOF'
|
|
#!/bin/bash
|
|
# Disable transparent huge pages for database performance
|
|
echo never > /sys/kernel/mm/transparent_hugepage/enabled
|
|
echo never > /sys/kernel/mm/transparent_hugepage/defrag
|
|
exit 0
|
|
EOF
|
|
|
|
chmod +x /etc/rc.local
|
|
log_info "Disabled transparent huge pages for database workloads"
|
|
fi
|
|
}
|
|
|
|
configure_kernel_parameters() {
|
|
log_info "Configuring kernel parameters for storage performance"
|
|
|
|
# Backup current sysctl
|
|
local sysctl_backup="${CONFIG_BACKUP_DIR}/sysctl.conf.backup.$(date +%Y%m%d_%H%M%S)"
|
|
cp /etc/sysctl.conf "$sysctl_backup"
|
|
|
|
# Add storage performance optimizations
|
|
cat >> /etc/sysctl.conf << 'EOF'
|
|
|
|
# Storage Performance Optimizations
|
|
# Added by storage_performance_optimizer.sh
|
|
|
|
# Virtual Memory settings
|
|
vm.dirty_ratio = 5
|
|
vm.dirty_background_ratio = 2
|
|
vm.dirty_expire_centisecs = 3000
|
|
vm.dirty_writeback_centisecs = 500
|
|
|
|
# Network performance (affects storage over network)
|
|
net.core.rmem_max = 134217728
|
|
net.core.wmem_max = 134217728
|
|
net.ipv4.tcp_rmem = 4096 87380 134217728
|
|
net.ipv4.tcp_wmem = 4096 65536 134217728
|
|
net.ipv4.tcp_congestion_control = bbr
|
|
|
|
# File system settings
|
|
fs.file-max = 2097152
|
|
fs.nr_open = 1048576
|
|
|
|
# Process limits
|
|
kernel.pid_max = 4194304
|
|
EOF
|
|
|
|
# Apply settings
|
|
sysctl -p
|
|
|
|
log_info "Kernel parameters configured for storage performance"
|
|
}
|
|
|
|
setup_storage_monitoring() {
|
|
log_info "Setting up storage performance monitoring"
|
|
|
|
# Create storage monitoring script
|
|
cat > "${SCRIPT_DIR}/storage_monitor.py" << 'EOF'
|
|
#!/usr/bin/env python3
|
|
"""
|
|
Storage Performance Monitor
|
|
Provides Prometheus metrics for storage I/O performance
|
|
"""
|
|
|
|
import psutil
|
|
import time
|
|
import json
|
|
from http.server import HTTPServer, BaseHTTPRequestHandler
|
|
import threading
|
|
import logging
|
|
import subprocess
|
|
import os
|
|
|
|
class StorageMonitor:
|
|
def __init__(self):
|
|
self.metrics = {}
|
|
self.update_interval = 10
|
|
|
|
def get_disk_io_metrics(self):
|
|
"""Get disk I/O metrics using psutil"""
|
|
try:
|
|
disk_io = psutil.disk_io_counters(perdisk=True)
|
|
disk_usage = {}
|
|
|
|
# Get disk usage for each mount point
|
|
for partition in psutil.disk_partitions():
|
|
try:
|
|
usage = psutil.disk_usage(partition.mountpoint)
|
|
disk_usage[partition.device] = {
|
|
'total': usage.total,
|
|
'used': usage.used,
|
|
'free': usage.free,
|
|
'percent': usage.percent,
|
|
'mountpoint': partition.mountpoint,
|
|
'fstype': partition.fstype
|
|
}
|
|
except PermissionError:
|
|
continue
|
|
|
|
metrics = {
|
|
'disk_io': {},
|
|
'disk_usage': disk_usage,
|
|
'timestamp': time.time()
|
|
}
|
|
|
|
for device, io_stats in disk_io.items():
|
|
metrics['disk_io'][device] = {
|
|
'read_count': io_stats.read_count,
|
|
'write_count': io_stats.write_count,
|
|
'read_bytes': io_stats.read_bytes,
|
|
'write_bytes': io_stats.write_bytes,
|
|
'read_time': io_stats.read_time,
|
|
'write_time': io_stats.write_time
|
|
}
|
|
|
|
return metrics
|
|
|
|
except Exception as e:
|
|
logging.error(f"Error getting disk I/O metrics: {e}")
|
|
return {}
|
|
|
|
def get_smart_metrics(self):
|
|
"""Get SMART metrics for disk health"""
|
|
smart_metrics = {}
|
|
|
|
try:
|
|
# Get list of drives
|
|
result = subprocess.run(['lsblk', '-o', 'NAME,TYPE', '-n'],
|
|
capture_output=True, text=True)
|
|
|
|
for line in result.stdout.strip().split('\n'):
|
|
parts = line.split()
|
|
if len(parts) >= 2 and parts[1] == 'disk':
|
|
device = f"/dev/{parts[0]}"
|
|
|
|
try:
|
|
# Get SMART health status
|
|
smart_result = subprocess.run(
|
|
['smartctl', '-H', device],
|
|
capture_output=True, text=True
|
|
)
|
|
|
|
health_status = 1 if 'PASSED' in smart_result.stdout else 0
|
|
smart_metrics[device] = {'health_status': health_status}
|
|
|
|
except Exception as e:
|
|
logging.warning(f"Could not get SMART data for {device}: {e}")
|
|
|
|
except Exception as e:
|
|
logging.error(f"Error getting SMART metrics: {e}")
|
|
|
|
return smart_metrics
|
|
|
|
def update_metrics(self):
|
|
"""Update all metrics periodically"""
|
|
while True:
|
|
try:
|
|
self.metrics = {
|
|
'disk_metrics': self.get_disk_io_metrics(),
|
|
'smart_metrics': self.get_smart_metrics(),
|
|
'last_update': time.time()
|
|
}
|
|
except Exception as e:
|
|
logging.error(f"Error updating metrics: {e}")
|
|
|
|
time.sleep(self.update_interval)
|
|
|
|
class MetricsHandler(BaseHTTPRequestHandler):
|
|
def __init__(self, storage_monitor, *args, **kwargs):
|
|
self.storage_monitor = storage_monitor
|
|
super().__init__(*args, **kwargs)
|
|
|
|
def do_GET(self):
|
|
if self.path == '/metrics':
|
|
self.send_response(200)
|
|
self.send_header('Content-type', 'text/plain')
|
|
self.end_headers()
|
|
|
|
metrics_text = self.generate_prometheus_metrics()
|
|
self.wfile.write(metrics_text.encode())
|
|
elif self.path == '/health':
|
|
self.send_response(200)
|
|
self.send_header('Content-type', 'application/json')
|
|
self.end_headers()
|
|
self.wfile.write(json.dumps({"status": "healthy"}).encode())
|
|
else:
|
|
self.send_response(404)
|
|
self.end_headers()
|
|
|
|
def generate_prometheus_metrics(self):
|
|
"""Generate Prometheus format metrics"""
|
|
metrics = []
|
|
|
|
try:
|
|
disk_metrics = self.storage_monitor.metrics.get('disk_metrics', {})
|
|
|
|
# Disk I/O metrics
|
|
disk_io = disk_metrics.get('disk_io', {})
|
|
for device, stats in disk_io.items():
|
|
device_label = device.replace('/', '_')
|
|
|
|
metrics.extend([
|
|
f'# HELP disk_read_bytes_total Total bytes read from disk',
|
|
f'# TYPE disk_read_bytes_total counter',
|
|
f'disk_read_bytes_total{{device="{device}"}} {stats["read_bytes"]}',
|
|
|
|
f'# HELP disk_write_bytes_total Total bytes written to disk',
|
|
f'# TYPE disk_write_bytes_total counter',
|
|
f'disk_write_bytes_total{{device="{device}"}} {stats["write_bytes"]}',
|
|
|
|
f'# HELP disk_read_operations_total Total read operations',
|
|
f'# TYPE disk_read_operations_total counter',
|
|
f'disk_read_operations_total{{device="{device}"}} {stats["read_count"]}',
|
|
|
|
f'# HELP disk_write_operations_total Total write operations',
|
|
f'# TYPE disk_write_operations_total counter',
|
|
f'disk_write_operations_total{{device="{device}"}} {stats["write_count"]}',
|
|
|
|
f'# HELP disk_read_time_ms_total Total time spent reading (ms)',
|
|
f'# TYPE disk_read_time_ms_total counter',
|
|
f'disk_read_time_ms_total{{device="{device}"}} {stats["read_time"]}',
|
|
|
|
f'# HELP disk_write_time_ms_total Total time spent writing (ms)',
|
|
f'# TYPE disk_write_time_ms_total counter',
|
|
f'disk_write_time_ms_total{{device="{device}"}} {stats["write_time"]}',
|
|
])
|
|
|
|
# Disk usage metrics
|
|
disk_usage = disk_metrics.get('disk_usage', {})
|
|
for device, usage in disk_usage.items():
|
|
metrics.extend([
|
|
f'# HELP disk_usage_bytes Disk usage in bytes',
|
|
f'# TYPE disk_usage_bytes gauge',
|
|
f'disk_usage_bytes{{device="{device}",mountpoint="{usage["mountpoint"]}",fstype="{usage["fstype"]}",type="total"}} {usage["total"]}',
|
|
f'disk_usage_bytes{{device="{device}",mountpoint="{usage["mountpoint"]}",fstype="{usage["fstype"]}",type="used"}} {usage["used"]}',
|
|
f'disk_usage_bytes{{device="{device}",mountpoint="{usage["mountpoint"]}",fstype="{usage["fstype"]}",type="free"}} {usage["free"]}',
|
|
|
|
f'# HELP disk_usage_percent Disk usage percentage',
|
|
f'# TYPE disk_usage_percent gauge',
|
|
f'disk_usage_percent{{device="{device}",mountpoint="{usage["mountpoint"]}"}} {usage["percent"]}',
|
|
])
|
|
|
|
# SMART health metrics
|
|
smart_metrics = self.storage_monitor.metrics.get('smart_metrics', {})
|
|
for device, smart_data in smart_metrics.items():
|
|
metrics.extend([
|
|
f'# HELP disk_smart_health SMART health status (1=healthy, 0=failing)',
|
|
f'# TYPE disk_smart_health gauge',
|
|
f'disk_smart_health{{device="{device}"}} {smart_data["health_status"]}',
|
|
])
|
|
|
|
except Exception as e:
|
|
logging.error(f"Error generating metrics: {e}")
|
|
metrics.append(f'# Error generating metrics: {e}')
|
|
|
|
return '\n'.join(metrics)
|
|
|
|
def main():
|
|
logging.basicConfig(level=logging.INFO)
|
|
|
|
storage_monitor = StorageMonitor()
|
|
|
|
# Start metrics collection in background
|
|
metrics_thread = threading.Thread(target=storage_monitor.update_metrics, daemon=True)
|
|
metrics_thread.start()
|
|
|
|
# Create handler with storage_monitor
|
|
def handler(*args, **kwargs):
|
|
return MetricsHandler(storage_monitor, *args, **kwargs)
|
|
|
|
# Start HTTP server
|
|
server = HTTPServer(('0.0.0.0', 9102), handler)
|
|
print("Storage metrics server started on port 9102")
|
|
server.serve_forever()
|
|
|
|
if __name__ == '__main__':
|
|
main()
|
|
EOF
|
|
|
|
chmod +x "${SCRIPT_DIR}/storage_monitor.py"
|
|
|
|
# Create systemd service
|
|
cat > "/etc/systemd/system/storage-monitor.service" << EOF
|
|
[Unit]
|
|
Description=Storage Performance Monitor
|
|
After=network.target
|
|
|
|
[Service]
|
|
Type=simple
|
|
User=root
|
|
WorkingDirectory=${SCRIPT_DIR}
|
|
ExecStart=/usr/bin/python3 ${SCRIPT_DIR}/storage_monitor.py
|
|
Restart=always
|
|
RestartSec=10
|
|
|
|
[Install]
|
|
WantedBy=multi-user.target
|
|
EOF
|
|
|
|
# Enable and start the service
|
|
systemctl daemon-reload
|
|
systemctl enable storage-monitor.service
|
|
systemctl start storage-monitor.service
|
|
|
|
log_info "Storage monitoring setup completed"
|
|
}
|
|
|
|
create_performance_testing_tools() {
|
|
log_info "Creating storage performance testing tools"
|
|
|
|
# Create comprehensive storage benchmark script
|
|
cat > "${SCRIPT_DIR}/storage_benchmark.sh" << 'EOF'
|
|
#!/bin/bash
|
|
# Storage Performance Benchmark Tool
|
|
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
source "${SCRIPT_DIR}/lib/error_handling.sh"
|
|
|
|
BENCHMARK_DIR="/tmp/storage_benchmark_$(date +%Y%m%d_%H%M%S)"
|
|
RESULTS_DIR="${SCRIPT_DIR}/../results/benchmarks"
|
|
|
|
run_storage_benchmarks() {
|
|
log_info "Running comprehensive storage benchmarks"
|
|
|
|
mkdir -p "$BENCHMARK_DIR" "$RESULTS_DIR"
|
|
local results_file="${RESULTS_DIR}/storage_benchmark_$(date +%Y%m%d_%H%M%S).json"
|
|
|
|
{
|
|
echo "{"
|
|
echo " \"benchmark_info\": {"
|
|
echo " \"timestamp\": \"$(date -Iseconds)\","
|
|
echo " \"hostname\": \"$(hostname)\","
|
|
echo " \"benchmark_dir\": \"$BENCHMARK_DIR\""
|
|
echo " },"
|
|
|
|
# Sequential read/write tests
|
|
echo " \"sequential_tests\": {"
|
|
run_sequential_tests
|
|
echo " },"
|
|
|
|
# Random read/write tests
|
|
echo " \"random_tests\": {"
|
|
run_random_tests
|
|
echo " },"
|
|
|
|
# Database-like workload tests
|
|
echo " \"database_tests\": {"
|
|
run_database_tests
|
|
echo " },"
|
|
|
|
# Mixed workload tests
|
|
echo " \"mixed_tests\": {"
|
|
run_mixed_tests
|
|
echo " }"
|
|
|
|
echo "}"
|
|
} > "$results_file"
|
|
|
|
log_info "Benchmark results saved to: $results_file"
|
|
|
|
# Cleanup
|
|
rm -rf "$BENCHMARK_DIR"
|
|
}
|
|
|
|
run_sequential_tests() {
|
|
log_info "Running sequential I/O tests"
|
|
|
|
# Sequential write test
|
|
local seq_write_result
|
|
seq_write_result=$(fio --name=seq-write --rw=write --bs=1M --size=1G \
|
|
--directory="$BENCHMARK_DIR" --numjobs=1 --time_based=0 \
|
|
--output-format=json 2>/dev/null | jq '.jobs[0].write')
|
|
|
|
# Sequential read test
|
|
local seq_read_result
|
|
seq_read_result=$(fio --name=seq-read --rw=read --bs=1M --size=1G \
|
|
--directory="$BENCHMARK_DIR" --numjobs=1 --time_based=0 \
|
|
--output-format=json 2>/dev/null | jq '.jobs[0].read')
|
|
|
|
echo " \"sequential_write\": $seq_write_result,"
|
|
echo " \"sequential_read\": $seq_read_result"
|
|
}
|
|
|
|
run_random_tests() {
|
|
log_info "Running random I/O tests"
|
|
|
|
# Random read test (4K blocks)
|
|
local rand_read_result
|
|
rand_read_result=$(fio --name=rand-read --rw=randread --bs=4K --size=500M \
|
|
--directory="$BENCHMARK_DIR" --numjobs=4 --runtime=60 --time_based=1 \
|
|
--output-format=json 2>/dev/null | jq '.jobs[0].read')
|
|
|
|
# Random write test (4K blocks)
|
|
local rand_write_result
|
|
rand_write_result=$(fio --name=rand-write --rw=randwrite --bs=4K --size=500M \
|
|
--directory="$BENCHMARK_DIR" --numjobs=4 --runtime=60 --time_based=1 \
|
|
--output-format=json 2>/dev/null | jq '.jobs[0].write')
|
|
|
|
echo " \"random_read_4k\": $rand_read_result,"
|
|
echo " \"random_write_4k\": $rand_write_result"
|
|
}
|
|
|
|
run_database_tests() {
|
|
log_info "Running database-like workload tests"
|
|
|
|
# Database-like mixed workload (70% read, 30% write)
|
|
local db_mixed_result
|
|
db_mixed_result=$(fio --name=db-mixed --rw=randrw --rwmixread=70 --bs=8K \
|
|
--size=500M --directory="$BENCHMARK_DIR" --numjobs=8 --runtime=60 \
|
|
--time_based=1 --output-format=json 2>/dev/null | jq '.jobs[0]')
|
|
|
|
echo " \"database_mixed_workload\": $db_mixed_result"
|
|
}
|
|
|
|
run_mixed_tests() {
|
|
log_info "Running mixed workload tests"
|
|
|
|
# Simulate container I/O patterns
|
|
local container_result
|
|
container_result=$(fio --name=container-io --rw=randrw --rwmixread=60 \
|
|
--bs=64K --size=500M --directory="$BENCHMARK_DIR" --numjobs=2 \
|
|
--runtime=60 --time_based=1 --output-format=json 2>/dev/null | jq '.jobs[0]')
|
|
|
|
echo " \"container_workload\": $container_result"
|
|
}
|
|
|
|
# Run benchmarks if called directly
|
|
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
|
|
run_storage_benchmarks
|
|
fi
|
|
EOF
|
|
|
|
chmod +x "${SCRIPT_DIR}/storage_benchmark.sh"
|
|
|
|
# Create storage health check script
|
|
cat > "${SCRIPT_DIR}/storage_health_check.sh" << 'EOF'
|
|
#!/bin/bash
|
|
# Storage Health Check Tool
|
|
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
source "${SCRIPT_DIR}/lib/error_handling.sh"
|
|
|
|
check_storage_health() {
|
|
log_info "Performing storage health check"
|
|
|
|
local health_report="${SCRIPT_DIR}/../reports/storage_health_$(date +%Y%m%d_%H%M%S).txt"
|
|
mkdir -p "$(dirname "$health_report")"
|
|
|
|
{
|
|
echo "Storage Health Check Report"
|
|
echo "Generated: $(date)"
|
|
echo "=========================="
|
|
echo
|
|
|
|
check_disk_space
|
|
check_smart_status
|
|
check_filesystem_errors
|
|
check_io_performance
|
|
check_docker_volumes
|
|
|
|
} > "$health_report"
|
|
|
|
log_info "Health check report saved to: $health_report"
|
|
}
|
|
|
|
check_disk_space() {
|
|
echo "=== Disk Space Check ==="
|
|
df -h | grep -E "(Filesystem|/dev/)"
|
|
echo
|
|
|
|
# Check for critical space usage (>90%)
|
|
local critical_mounts
|
|
critical_mounts=$(df -h | awk 'NR>1 {gsub("%","",$5); if($5 > 90) print $6 " (" $5 "%)"}')
|
|
|
|
if [[ -n "$critical_mounts" ]]; then
|
|
echo "WARNING: Critical disk space usage detected:"
|
|
echo "$critical_mounts"
|
|
else
|
|
echo "Disk space usage: OK"
|
|
fi
|
|
echo
|
|
}
|
|
|
|
check_smart_status() {
|
|
echo "=== SMART Health Check ==="
|
|
|
|
for drive in /dev/sd* /dev/nvme*; do
|
|
if [[ -b "$drive" ]] && [[ ! "$drive" =~ [0-9]$ ]]; then
|
|
echo "Drive: $drive"
|
|
if smartctl -H "$drive" 2>/dev/null | grep -q "PASSED"; then
|
|
echo " Status: HEALTHY"
|
|
else
|
|
echo " Status: WARNING - Check SMART details"
|
|
fi
|
|
fi
|
|
done
|
|
echo
|
|
}
|
|
|
|
check_filesystem_errors() {
|
|
echo "=== Filesystem Error Check ==="
|
|
|
|
# Check dmesg for filesystem errors
|
|
local fs_errors
|
|
fs_errors=$(dmesg | grep -i "error\|fail\|corrupt" | grep -E "(ext4|xfs|btrfs)" | tail -5)
|
|
|
|
if [[ -n "$fs_errors" ]]; then
|
|
echo "Recent filesystem errors found:"
|
|
echo "$fs_errors"
|
|
else
|
|
echo "No recent filesystem errors found"
|
|
fi
|
|
echo
|
|
}
|
|
|
|
check_io_performance() {
|
|
echo "=== I/O Performance Check ==="
|
|
|
|
# Quick I/O test
|
|
local test_file="/tmp/io_test_$$"
|
|
local write_speed read_speed
|
|
|
|
write_speed=$(dd if=/dev/zero of="$test_file" bs=1M count=100 2>&1 |
|
|
grep -o '[0-9.]\+ MB/s' | tail -1)
|
|
read_speed=$(dd if="$test_file" of=/dev/null bs=1M 2>&1 |
|
|
grep -o '[0-9.]\+ MB/s' | tail -1)
|
|
|
|
echo "Sequential write speed: ${write_speed:-Unknown}"
|
|
echo "Sequential read speed: ${read_speed:-Unknown}"
|
|
|
|
rm -f "$test_file"
|
|
echo
|
|
}
|
|
|
|
check_docker_volumes() {
|
|
echo "=== Docker Volume Health ==="
|
|
|
|
if command -v docker &>/dev/null; then
|
|
echo "Docker volumes:"
|
|
docker volume ls --format "table {{.Name}}\t{{.Driver}}\t{{.Scope}}"
|
|
echo
|
|
|
|
# Check for dangling volumes
|
|
local dangling_volumes
|
|
dangling_volumes=$(docker volume ls -qf dangling=true)
|
|
|
|
if [[ -n "$dangling_volumes" ]]; then
|
|
echo "Dangling volumes found: $dangling_volumes"
|
|
else
|
|
echo "No dangling volumes found"
|
|
fi
|
|
else
|
|
echo "Docker not available"
|
|
fi
|
|
echo
|
|
}
|
|
|
|
# Run health check if called directly
|
|
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
|
|
check_storage_health
|
|
fi
|
|
EOF
|
|
|
|
chmod +x "${SCRIPT_DIR}/storage_health_check.sh"
|
|
|
|
log_info "Storage performance testing tools created"
|
|
}
|
|
|
|
optimize_docker_volumes() {
|
|
log_info "Optimizing Docker volume configurations"
|
|
|
|
# Create optimized volume creation script
|
|
cat > "${SCRIPT_DIR}/create_optimized_volumes.sh" << 'EOF'
|
|
#!/bin/bash
|
|
# Create Optimized Docker Volumes
|
|
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
source "${SCRIPT_DIR}/lib/error_handling.sh"
|
|
|
|
create_optimized_volumes() {
|
|
log_info "Creating optimized Docker volumes"
|
|
|
|
# Database volumes with specific optimizations
|
|
create_database_volumes
|
|
|
|
# Media volumes with large file optimizations
|
|
create_media_volumes
|
|
|
|
# Cache volumes with performance optimizations
|
|
create_cache_volumes
|
|
|
|
# Backup volumes with compression support
|
|
create_backup_volumes
|
|
}
|
|
|
|
create_database_volumes() {
|
|
log_info "Creating optimized database volumes"
|
|
|
|
# PostgreSQL data volume
|
|
if ! docker volume inspect postgres_data &>/dev/null; then
|
|
docker volume create postgres_data \
|
|
--driver local \
|
|
--opt type=none \
|
|
--opt o=bind \
|
|
--opt device=/opt/homelab/postgres/data
|
|
|
|
# Set optimal permissions and ownership
|
|
mkdir -p /opt/homelab/postgres/data
|
|
chown 999:999 /opt/homelab/postgres/data
|
|
chmod 700 /opt/homelab/postgres/data
|
|
|
|
log_info "Created PostgreSQL data volume with optimized permissions"
|
|
fi
|
|
|
|
# Redis data volume
|
|
if ! docker volume inspect redis_data &>/dev/null; then
|
|
docker volume create redis_data \
|
|
--driver local \
|
|
--opt type=none \
|
|
--opt o=bind \
|
|
--opt device=/opt/homelab/redis/data
|
|
|
|
mkdir -p /opt/homelab/redis/data
|
|
chown 999:999 /opt/homelab/redis/data
|
|
chmod 755 /opt/homelab/redis/data
|
|
|
|
log_info "Created Redis data volume"
|
|
fi
|
|
|
|
# InfluxDB data volume
|
|
if ! docker volume inspect influxdb_data &>/dev/null; then
|
|
docker volume create influxdb_data \
|
|
--driver local \
|
|
--opt type=none \
|
|
--opt o=bind \
|
|
--opt device=/opt/homelab/influxdb/data
|
|
|
|
mkdir -p /opt/homelab/influxdb/data
|
|
chown 1000:1000 /opt/homelab/influxdb/data
|
|
chmod 755 /opt/homelab/influxdb/data
|
|
|
|
log_info "Created InfluxDB data volume"
|
|
fi
|
|
}
|
|
|
|
create_media_volumes() {
|
|
log_info "Creating optimized media volumes"
|
|
|
|
# Jellyfin media volume
|
|
if ! docker volume inspect jellyfin_config &>/dev/null; then
|
|
docker volume create jellyfin_config \
|
|
--driver local \
|
|
--opt type=none \
|
|
--opt o=bind \
|
|
--opt device=/opt/homelab/jellyfin/config
|
|
|
|
mkdir -p /opt/homelab/jellyfin/config
|
|
chown 1000:1000 /opt/homelab/jellyfin/config
|
|
chmod 755 /opt/homelab/jellyfin/config
|
|
fi
|
|
|
|
# Immich upload volume
|
|
if ! docker volume inspect immich_uploads &>/dev/null; then
|
|
docker volume create immich_uploads \
|
|
--driver local \
|
|
--opt type=none \
|
|
--opt o=bind \
|
|
--opt device=/opt/homelab/immich/uploads
|
|
|
|
mkdir -p /opt/homelab/immich/uploads
|
|
chown 1000:1000 /opt/homelab/immich/uploads
|
|
chmod 755 /opt/homelab/immich/uploads
|
|
fi
|
|
}
|
|
|
|
create_cache_volumes() {
|
|
log_info "Creating optimized cache volumes"
|
|
|
|
# Temporary/cache volume (could use tmpfs for performance)
|
|
if ! docker volume inspect app_cache &>/dev/null; then
|
|
# Use tmpfs for high-performance caching if enough RAM available
|
|
local total_memory_kb
|
|
total_memory_kb=$(grep MemTotal /proc/meminfo | awk '{print $2}')
|
|
local total_memory_gb=$((total_memory_kb / 1024 / 1024))
|
|
|
|
if [[ $total_memory_gb -gt 8 ]]; then
|
|
# Use tmpfs for caching on systems with > 8GB RAM
|
|
docker volume create app_cache \
|
|
--driver local \
|
|
--opt type=tmpfs \
|
|
--opt device=tmpfs \
|
|
--opt o=size=1G,uid=1000,gid=1000
|
|
|
|
log_info "Created tmpfs cache volume (1GB)"
|
|
else
|
|
# Use regular bind mount for caching
|
|
docker volume create app_cache \
|
|
--driver local \
|
|
--opt type=none \
|
|
--opt o=bind \
|
|
--opt device=/opt/homelab/cache
|
|
|
|
mkdir -p /opt/homelab/cache
|
|
chmod 755 /opt/homelab/cache
|
|
|
|
log_info "Created filesystem cache volume"
|
|
fi
|
|
fi
|
|
}
|
|
|
|
create_backup_volumes() {
|
|
log_info "Creating optimized backup volumes"
|
|
|
|
# Backup volume with compression support
|
|
if ! docker volume inspect backup_data &>/dev/null; then
|
|
docker volume create backup_data \
|
|
--driver local \
|
|
--opt type=none \
|
|
--opt o=bind \
|
|
--opt device=/opt/homelab/backups
|
|
|
|
mkdir -p /opt/homelab/backups
|
|
chmod 755 /opt/homelab/backups
|
|
|
|
log_info "Created backup data volume"
|
|
fi
|
|
}
|
|
|
|
# Run volume creation if called directly
|
|
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
|
|
create_optimized_volumes
|
|
fi
|
|
EOF
|
|
|
|
chmod +x "${SCRIPT_DIR}/create_optimized_volumes.sh"
|
|
|
|
# Run the volume optimization
|
|
"${SCRIPT_DIR}/create_optimized_volumes.sh"
|
|
|
|
log_info "Docker volume optimization completed"
|
|
}
|
|
|
|
cleanup_on_exit() {
|
|
log_info "Cleaning up storage optimization resources"
|
|
|
|
# Remove any temporary benchmark files
|
|
rm -rf /tmp/storage_benchmark_* /tmp/io_test_* 2>/dev/null || true
|
|
|
|
log_info "Storage optimization cleanup completed"
|
|
}
|
|
|
|
# Execute main function
|
|
main "$@" |