Files
HomeAudit/migration_scripts/scripts/validate_nfs_performance.sh
admin 705a2757c1 Major infrastructure migration and Vaultwarden PostgreSQL troubleshooting
COMPREHENSIVE CHANGES:

INFRASTRUCTURE MIGRATION:
- Migrated services to Docker Swarm on OMV800 (192.168.50.229)
- Deployed PostgreSQL database for Vaultwarden migration
- Updated all stack configurations for Docker Swarm compatibility
- Added comprehensive monitoring stack (Prometheus, Grafana, Blackbox)
- Implemented proper secret management for all services

VAULTWARDEN POSTGRESQL MIGRATION:
- Attempted migration from SQLite to PostgreSQL for NFS compatibility
- Created PostgreSQL stack with proper user/password configuration
- Built custom Vaultwarden image with PostgreSQL support
- Troubleshot persistent SQLite fallback issue despite PostgreSQL config
- Identified known issue where Vaultwarden silently falls back to SQLite
- Added ENABLE_DB_WAL=false to prevent filesystem compatibility issues
- Current status: Old Vaultwarden on lenovo410 still working, new one has config issues

PAPERLESS SERVICES:
- Successfully deployed Paperless-NGX and Paperless-AI on OMV800
- Both services running on ports 8000 and 3000 respectively
- Caddy configuration updated for external access
- Services accessible via paperless.pressmess.duckdns.org and paperless-ai.pressmess.duckdns.org

CADDY CONFIGURATION:
- Updated Caddyfile on Surface (192.168.50.254) for new service locations
- Fixed Vaultwarden reverse proxy to point to new Docker Swarm service
- Removed old notification hub reference that was causing conflicts
- All services properly configured for external access via DuckDNS

BACKUP AND DISCOVERY:
- Created comprehensive backup system for all hosts
- Generated detailed discovery reports for infrastructure analysis
- Implemented automated backup validation scripts
- Created migration progress tracking and verification reports

MONITORING STACK:
- Deployed Prometheus, Grafana, and Blackbox monitoring
- Created infrastructure and system overview dashboards
- Added proper service discovery and alerting configuration
- Implemented performance monitoring for all critical services

DOCUMENTATION:
- Reorganized documentation into logical structure
- Created comprehensive migration playbook and troubleshooting guides
- Added hardware specifications and optimization recommendations
- Documented all configuration changes and service dependencies

CURRENT STATUS:
- Paperless services:  Working and accessible externally
- Vaultwarden:  PostgreSQL configuration issues, old instance still working
- Monitoring:  Deployed and operational
- Caddy:  Updated and working for external access
- PostgreSQL:  Database running, connection issues with Vaultwarden

NEXT STEPS:
- Continue troubleshooting Vaultwarden PostgreSQL configuration
- Consider alternative approaches for Vaultwarden migration
- Validate all external service access
- Complete final migration validation

TECHNICAL NOTES:
- Used Docker Swarm for orchestration on OMV800
- Implemented proper secret management for sensitive data
- Added comprehensive logging and monitoring
- Created automated backup and validation scripts
2025-08-30 20:18:44 -04:00

344 lines
10 KiB
Bash
Executable File

#!/bin/bash
# NFS Performance Validation Script
# Validates NFS performance and connectivity across the infrastructure
set -euo pipefail
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Function to print colored output
print_status() {
echo -e "${GREEN}[INFO]${NC} $1"
}
print_warning() {
echo -e "${YELLOW}[WARNING]${NC} $1"
}
print_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
print_header() {
echo -e "${BLUE}[HEADER]${NC} $1"
}
# Configuration
NFS_SERVER="omv800.local"
NFS_EXPORT="/export"
TEST_DIR="/mnt/nfs_test"
TEST_FILE_SIZE="100M"
TEST_ITERATIONS=5
HOSTS=("omv800.local" "jonathan-2518f5u" "surface" "fedora" "audrey")
OUTPUT_FILE="${1:-/tmp/nfs_performance_report.txt}"
# Function to check if NFS server is accessible
check_nfs_server() {
print_header "Checking NFS Server Accessibility"
if ! ping -c 1 "$NFS_SERVER" >/dev/null 2>&1; then
print_error "NFS server $NFS_SERVER is not reachable"
return 1
fi
if ! showmount -e "$NFS_SERVER" >/dev/null 2>&1; then
print_error "Cannot get exports from $NFS_SERVER"
return 1
fi
print_status "NFS server $NFS_SERVER is accessible"
showmount -e "$NFS_SERVER"
}
# Function to mount NFS export
mount_nfs_export() {
print_header "Mounting NFS Export"
# Create test directory
sudo mkdir -p "$TEST_DIR"
# Unmount if already mounted
if mountpoint -q "$TEST_DIR"; then
print_status "Unmounting existing mount at $TEST_DIR"
sudo umount "$TEST_DIR"
fi
# Mount NFS export
if sudo mount -t nfs "$NFS_SERVER:$NFS_EXPORT" "$TEST_DIR"; then
print_status "Successfully mounted $NFS_SERVER:$NFS_EXPORT to $TEST_DIR"
return 0
else
print_error "Failed to mount $NFS_SERVER:$NFS_EXPORT"
return 1
fi
}
# Function to test write performance
test_write_performance() {
print_header "Testing Write Performance"
local test_file="$TEST_DIR/write_test_$(date +%s)"
local total_time=0
local total_size=0
for i in $(seq 1 $TEST_ITERATIONS); do
print_status "Write test iteration $i/$TEST_ITERATIONS"
local start_time=$(date +%s.%N)
if dd if=/dev/zero of="$test_file.$i" bs=1M count=100 2>/dev/null; then
local end_time=$(date +%s.%N)
local duration=$(echo "$end_time - $start_time" | bc -l)
local size=$(stat -c%s "$test_file.$i" 2>/dev/null || echo "0")
total_time=$(echo "$total_time + $duration" | bc -l)
total_size=$(echo "$total_size + $size" | bc -l)
print_status "Iteration $i: ${size} bytes in ${duration}s"
else
print_error "Write test iteration $i failed"
fi
done
local avg_time=$(echo "$total_time / $TEST_ITERATIONS" | bc -l)
local avg_size=$(echo "$total_size / $TEST_ITERATIONS" | bc -l)
local write_speed=$(echo "$avg_size / $avg_time / 1024 / 1024" | bc -l)
echo "Write Performance: ${write_speed} MB/s average" >> "$OUTPUT_FILE"
print_status "Write Performance: ${write_speed} MB/s average"
}
# Function to test read performance
test_read_performance() {
print_header "Testing Read Performance"
local test_file="$TEST_DIR/read_test_$(date +%s)"
local total_time=0
local total_size=0
# Create a test file first
dd if=/dev/zero of="$test_file" bs=1M count=100 2>/dev/null
for i in $(seq 1 $TEST_ITERATIONS); do
print_status "Read test iteration $i/$TEST_ITERATIONS"
local start_time=$(date +%s.%N)
if dd if="$test_file" of=/dev/null bs=1M 2>/dev/null; then
local end_time=$(date +%s.%N)
local duration=$(echo "$end_time - $start_time" | bc -l)
local size=$(stat -c%s "$test_file" 2>/dev/null || echo "0")
total_time=$(echo "$total_time + $duration" | bc -l)
total_size=$(echo "$total_size + $size" | bc -l)
print_status "Iteration $i: ${size} bytes in ${duration}s"
else
print_error "Read test iteration $i failed"
fi
done
local avg_time=$(echo "$total_time / $TEST_ITERATIONS" | bc -l)
local avg_size=$(echo "$total_size / $TEST_ITERATIONS" | bc -l)
local read_speed=$(echo "$avg_size / $avg_time / 1024 / 1024" | bc -l)
echo "Read Performance: ${read_speed} MB/s average" >> "$OUTPUT_FILE"
print_status "Read Performance: ${read_speed} MB/s average"
# Cleanup test file
rm -f "$test_file"
}
# Function to test concurrent access
test_concurrent_access() {
print_header "Testing Concurrent Access"
local test_file="$TEST_DIR/concurrent_test"
local num_processes=10
local test_duration=30
# Create test file
dd if=/dev/zero of="$test_file" bs=1M count=10 2>/dev/null
print_status "Starting $num_processes concurrent processes for ${test_duration}s"
local start_time=$(date +%s)
# Start concurrent processes
for i in $(seq 1 $num_processes); do
(
while [ $(($(date +%s) - start_time)) -lt $test_duration ]; do
dd if="$test_file" of=/dev/null bs=1M count=1 2>/dev/null
sleep 0.1
done
) &
done
# Wait for all processes
wait
local end_time=$(date +%s)
local total_time=$((end_time - start_time))
echo "Concurrent Access: $num_processes processes for ${total_time}s - PASSED" >> "$OUTPUT_FILE"
print_status "Concurrent access test completed successfully"
# Cleanup
rm -f "$test_file"
}
# Function to test network latency
test_network_latency() {
print_header "Testing Network Latency"
local total_latency=0
local ping_count=10
for i in $(seq 1 $ping_count); do
local latency=$(ping -c 1 "$NFS_SERVER" 2>/dev/null | grep "time=" | cut -d'=' -f4 | cut -d' ' -f1)
if [ -n "$latency" ]; then
total_latency=$(echo "$total_latency + $latency" | bc -l)
fi
done
local avg_latency=$(echo "$total_latency / $ping_count" | bc -l)
echo "Network Latency: ${avg_latency}ms average" >> "$OUTPUT_FILE"
print_status "Network Latency: ${avg_latency}ms average"
}
# Function to check NFS mount options
check_mount_options() {
print_header "Checking NFS Mount Options"
if mountpoint -q "$TEST_DIR"; then
local mount_info=$(mount | grep "$TEST_DIR")
echo "Mount Options: $mount_info" >> "$OUTPUT_FILE"
print_status "Current mount: $mount_info"
# Check for performance options
if echo "$mount_info" | grep -q "rsize="; then
print_status "Read buffer size configured"
else
print_warning "Read buffer size not configured (consider rsize=32768)"
fi
if echo "$mount_info" | grep -q "wsize="; then
print_status "Write buffer size configured"
else
print_warning "Write buffer size not configured (consider wsize=32768)"
fi
if echo "$mount_info" | grep -q "noatime"; then
print_status "No access time updates configured"
else
print_warning "Access time updates enabled (consider noatime for performance)"
fi
else
print_error "NFS not mounted at $TEST_DIR"
fi
}
# Function to generate performance report
generate_report() {
print_header "Generating Performance Report"
echo "=== NFS Performance Validation Report ===" > "$OUTPUT_FILE"
echo "Date: $(date)" >> "$OUTPUT_FILE"
echo "NFS Server: $NFS_SERVER" >> "$OUTPUT_FILE"
echo "NFS Export: $NFS_EXPORT" >> "$OUTPUT_FILE"
echo "Test Directory: $TEST_DIR" >> "$OUTPUT_FILE"
echo "" >> "$OUTPUT_FILE"
# Add system information
echo "=== System Information ===" >> "$OUTPUT_FILE"
echo "Hostname: $(hostname)" >> "$OUTPUT_FILE"
echo "Kernel: $(uname -r)" >> "$OUTPUT_FILE"
echo "NFS Client Version: $(nfsstat -c | head -1)" >> "$OUTPUT_FILE"
echo "" >> "$OUTPUT_FILE"
# Add network information
echo "=== Network Information ===" >> "$OUTPUT_FILE"
ip route get "$NFS_SERVER" >> "$OUTPUT_FILE" 2>/dev/null || echo "Cannot determine route to $NFS_SERVER" >> "$OUTPUT_FILE"
echo "" >> "$OUTPUT_FILE"
print_status "Performance report saved to $OUTPUT_FILE"
}
# Function to cleanup
cleanup() {
print_header "Cleaning Up"
# Remove test files
rm -f "$TEST_DIR"/test_* 2>/dev/null || true
# Unmount NFS
if mountpoint -q "$TEST_DIR"; then
sudo umount "$TEST_DIR"
print_status "Unmounted $TEST_DIR"
fi
# Remove test directory
sudo rmdir "$TEST_DIR" 2>/dev/null || true
}
# Function to display usage
usage() {
echo "Usage: $0 [output_file]"
echo " output_file: Path to save performance report (default: /tmp/nfs_performance_report.txt)"
echo ""
echo "This script validates NFS performance and connectivity."
echo "It performs write/read tests, concurrent access tests, and network latency tests."
}
# Main execution
main() {
print_header "Starting NFS Performance Validation"
# Check if running as root (needed for mounting)
if [ "$EUID" -ne 0 ]; then
print_error "This script must be run as root (needed for NFS mounting)"
exit 1
fi
# Check dependencies
for cmd in ping showmount mount dd bc nfsstat; do
if ! command -v "$cmd" >/dev/null 2>&1; then
print_error "Required command '$cmd' not found"
exit 1
fi
done
# Initialize report
generate_report
# Run tests
if check_nfs_server && mount_nfs_export; then
test_network_latency
check_mount_options
test_write_performance
test_read_performance
test_concurrent_access
print_status "All NFS performance tests completed successfully"
print_status "Report saved to: $OUTPUT_FILE"
else
print_error "NFS validation failed - cannot proceed with performance tests"
exit 1
fi
}
# Trap to ensure cleanup on exit
trap cleanup EXIT
# Parse command line arguments
if [ "$1" = "-h" ] || [ "$1" = "--help" ]; then
usage
exit 0
fi
# Run main function
main "$@"