COMPREHENSIVE CHANGES: INFRASTRUCTURE MIGRATION: - Migrated services to Docker Swarm on OMV800 (192.168.50.229) - Deployed PostgreSQL database for Vaultwarden migration - Updated all stack configurations for Docker Swarm compatibility - Added comprehensive monitoring stack (Prometheus, Grafana, Blackbox) - Implemented proper secret management for all services VAULTWARDEN POSTGRESQL MIGRATION: - Attempted migration from SQLite to PostgreSQL for NFS compatibility - Created PostgreSQL stack with proper user/password configuration - Built custom Vaultwarden image with PostgreSQL support - Troubleshot persistent SQLite fallback issue despite PostgreSQL config - Identified known issue where Vaultwarden silently falls back to SQLite - Added ENABLE_DB_WAL=false to prevent filesystem compatibility issues - Current status: Old Vaultwarden on lenovo410 still working, new one has config issues PAPERLESS SERVICES: - Successfully deployed Paperless-NGX and Paperless-AI on OMV800 - Both services running on ports 8000 and 3000 respectively - Caddy configuration updated for external access - Services accessible via paperless.pressmess.duckdns.org and paperless-ai.pressmess.duckdns.org CADDY CONFIGURATION: - Updated Caddyfile on Surface (192.168.50.254) for new service locations - Fixed Vaultwarden reverse proxy to point to new Docker Swarm service - Removed old notification hub reference that was causing conflicts - All services properly configured for external access via DuckDNS BACKUP AND DISCOVERY: - Created comprehensive backup system for all hosts - Generated detailed discovery reports for infrastructure analysis - Implemented automated backup validation scripts - Created migration progress tracking and verification reports MONITORING STACK: - Deployed Prometheus, Grafana, and Blackbox monitoring - Created infrastructure and system overview dashboards - Added proper service discovery and alerting configuration - Implemented performance monitoring for all critical services DOCUMENTATION: - Reorganized documentation into logical structure - Created comprehensive migration playbook and troubleshooting guides - Added hardware specifications and optimization recommendations - Documented all configuration changes and service dependencies CURRENT STATUS: - Paperless services: ✅ Working and accessible externally - Vaultwarden: ❌ PostgreSQL configuration issues, old instance still working - Monitoring: ✅ Deployed and operational - Caddy: ✅ Updated and working for external access - PostgreSQL: ✅ Database running, connection issues with Vaultwarden NEXT STEPS: - Continue troubleshooting Vaultwarden PostgreSQL configuration - Consider alternative approaches for Vaultwarden migration - Validate all external service access - Complete final migration validation TECHNICAL NOTES: - Used Docker Swarm for orchestration on OMV800 - Implemented proper secret management for sensitive data - Added comprehensive logging and monitoring - Created automated backup and validation scripts
96 lines
2.5 KiB
YAML
96 lines
2.5 KiB
YAML
version: '3.9'
|
|
services:
|
|
prometheus:
|
|
image: prom/prometheus:v2.47.0
|
|
command:
|
|
- --config.file=/etc/prometheus/prometheus.yml
|
|
- --storage.tsdb.path=/prometheus
|
|
- --web.console.libraries=/etc/prometheus/console_libraries
|
|
- --web.console.templates=/etc/prometheus/consoles
|
|
- --storage.tsdb.retention.time=30d
|
|
- --web.enable-lifecycle
|
|
- --web.enable-admin-api
|
|
volumes:
|
|
- prometheus_data:/prometheus
|
|
- /opt/configs/monitoring/prometheus-simple.yml:/etc/prometheus/prometheus.yml:ro
|
|
networks:
|
|
- monitoring-network
|
|
- caddy-public
|
|
ports:
|
|
- 9090:9090
|
|
healthcheck:
|
|
test:
|
|
- CMD
|
|
- wget
|
|
- --no-verbose
|
|
- --tries=1
|
|
- --spider
|
|
- http://localhost:9090/-/healthy
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
start_period: 30s
|
|
deploy:
|
|
resources:
|
|
limits:
|
|
memory: 1G
|
|
cpus: '0.5'
|
|
reservations:
|
|
memory: 512M
|
|
cpus: '0.25'
|
|
labels:
|
|
- traefik.enable=true
|
|
- traefik.http.routers.prometheus.rule=Host(`prometheus.pressmess.duckdns.org`)
|
|
- traefik.http.routers.prometheus.entrypoints=websecure
|
|
- traefik.http.routers.prometheus.tls=true
|
|
- traefik.http.services.prometheus.loadbalancer.server.port=9090
|
|
|
|
grafana:
|
|
image: grafana/grafana:10.1.2
|
|
environment:
|
|
GF_PROVISIONING_PATH: /etc/grafana/provisioning
|
|
GF_INSTALL_PLUGINS: grafana-clock-panel,grafana-simple-json-datasource,grafana-piechart-panel
|
|
GF_FEATURE_TOGGLES_ENABLE: publicDashboards
|
|
GF_SECURITY_ADMIN_PASSWORD: admin123
|
|
volumes:
|
|
- grafana_data:/var/lib/grafana
|
|
networks:
|
|
- monitoring-network
|
|
- caddy-public
|
|
healthcheck:
|
|
test:
|
|
- CMD-SHELL
|
|
- curl -f http://localhost:3000/api/health || exit 1
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
start_period: 60s
|
|
deploy:
|
|
resources:
|
|
limits:
|
|
memory: 1G
|
|
cpus: '0.5'
|
|
reservations:
|
|
memory: 512M
|
|
cpus: '0.25'
|
|
labels:
|
|
- traefik.enable=true
|
|
- traefik.http.routers.grafana.rule=Host(`grafana.pressmess.duckdns.org`)
|
|
- traefik.http.routers.grafana.entrypoints=websecure
|
|
- traefik.http.routers.grafana.tls=true
|
|
- traefik.http.services.grafana.loadbalancer.server.port=3000
|
|
|
|
volumes:
|
|
prometheus_data:
|
|
driver: local
|
|
prometheus_config:
|
|
driver: local
|
|
grafana_data:
|
|
driver: local
|
|
|
|
networks:
|
|
monitoring-network:
|
|
external: true
|
|
caddy-public:
|
|
external: true
|