COMPREHENSIVE CHANGES: INFRASTRUCTURE MIGRATION: - Migrated services to Docker Swarm on OMV800 (192.168.50.229) - Deployed PostgreSQL database for Vaultwarden migration - Updated all stack configurations for Docker Swarm compatibility - Added comprehensive monitoring stack (Prometheus, Grafana, Blackbox) - Implemented proper secret management for all services VAULTWARDEN POSTGRESQL MIGRATION: - Attempted migration from SQLite to PostgreSQL for NFS compatibility - Created PostgreSQL stack with proper user/password configuration - Built custom Vaultwarden image with PostgreSQL support - Troubleshot persistent SQLite fallback issue despite PostgreSQL config - Identified known issue where Vaultwarden silently falls back to SQLite - Added ENABLE_DB_WAL=false to prevent filesystem compatibility issues - Current status: Old Vaultwarden on lenovo410 still working, new one has config issues PAPERLESS SERVICES: - Successfully deployed Paperless-NGX and Paperless-AI on OMV800 - Both services running on ports 8000 and 3000 respectively - Caddy configuration updated for external access - Services accessible via paperless.pressmess.duckdns.org and paperless-ai.pressmess.duckdns.org CADDY CONFIGURATION: - Updated Caddyfile on Surface (192.168.50.254) for new service locations - Fixed Vaultwarden reverse proxy to point to new Docker Swarm service - Removed old notification hub reference that was causing conflicts - All services properly configured for external access via DuckDNS BACKUP AND DISCOVERY: - Created comprehensive backup system for all hosts - Generated detailed discovery reports for infrastructure analysis - Implemented automated backup validation scripts - Created migration progress tracking and verification reports MONITORING STACK: - Deployed Prometheus, Grafana, and Blackbox monitoring - Created infrastructure and system overview dashboards - Added proper service discovery and alerting configuration - Implemented performance monitoring for all critical services DOCUMENTATION: - Reorganized documentation into logical structure - Created comprehensive migration playbook and troubleshooting guides - Added hardware specifications and optimization recommendations - Documented all configuration changes and service dependencies CURRENT STATUS: - Paperless services: ✅ Working and accessible externally - Vaultwarden: ❌ PostgreSQL configuration issues, old instance still working - Monitoring: ✅ Deployed and operational - Caddy: ✅ Updated and working for external access - PostgreSQL: ✅ Database running, connection issues with Vaultwarden NEXT STEPS: - Continue troubleshooting Vaultwarden PostgreSQL configuration - Consider alternative approaches for Vaultwarden migration - Validate all external service access - Complete final migration validation TECHNICAL NOTES: - Used Docker Swarm for orchestration on OMV800 - Implemented proper secret management for sensitive data - Added comprehensive logging and monitoring - Created automated backup and validation scripts
246 lines
5.8 KiB
JSON
246 lines
5.8 KiB
JSON
{
|
|
"title": "Infrastructure Overview",
|
|
"tags": ["infrastructure", "overview"],
|
|
"style": "dark",
|
|
"timezone": "browser",
|
|
"panels": [
|
|
{
|
|
"id": 1,
|
|
"title": "HTTP Service Health Status",
|
|
"type": "stat",
|
|
"targets": [
|
|
{
|
|
"expr": "probe_success{job=\"http-service-health\"}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"thresholds": {
|
|
"steps": [
|
|
{"color": "red", "value": 0},
|
|
{"color": "green", "value": 1}
|
|
]
|
|
},
|
|
"mappings": [
|
|
{
|
|
"options": {
|
|
"0": {"text": "Down", "color": "red"},
|
|
"1": {"text": "Up", "color": "green"}
|
|
},
|
|
"type": "value"
|
|
}
|
|
],
|
|
"unit": "short"
|
|
}
|
|
},
|
|
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 0},
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"reduceOptions": {
|
|
"calcs": ["lastNotNull"],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"textMode": "auto"
|
|
}
|
|
},
|
|
{
|
|
"id": 2,
|
|
"title": "TCP Service Health Status",
|
|
"type": "stat",
|
|
"targets": [
|
|
{
|
|
"expr": "probe_success{job=\"tcp-service-health\"}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"thresholds": {
|
|
"steps": [
|
|
{"color": "red", "value": 0},
|
|
{"color": "green", "value": 1}
|
|
]
|
|
},
|
|
"mappings": [
|
|
{
|
|
"options": {
|
|
"0": {"text": "Down", "color": "red"},
|
|
"1": {"text": "Up", "color": "green"}
|
|
},
|
|
"type": "value"
|
|
}
|
|
],
|
|
"unit": "short"
|
|
}
|
|
},
|
|
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 0},
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"reduceOptions": {
|
|
"calcs": ["lastNotNull"],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"textMode": "auto"
|
|
}
|
|
},
|
|
{
|
|
"id": 3,
|
|
"title": "Service Response Time",
|
|
"type": "timeseries",
|
|
"targets": [
|
|
{
|
|
"expr": "probe_duration_seconds{job=\"http-service-health\"}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 10,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"vis": false
|
|
},
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "never",
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"unit": "s"
|
|
}
|
|
},
|
|
"gridPos": {"h": 8, "w": 24, "x": 0, "y": 8}
|
|
},
|
|
{
|
|
"id": 4,
|
|
"title": "HTTP Service Availability Summary",
|
|
"type": "stat",
|
|
"targets": [
|
|
{
|
|
"expr": "sum(probe_success{job=\"http-service-health\"})",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"thresholds": {
|
|
"steps": [
|
|
{"color": "red", "value": 0},
|
|
{"color": "yellow", "value": 3},
|
|
{"color": "green", "value": 6}
|
|
]
|
|
},
|
|
"unit": "short"
|
|
}
|
|
},
|
|
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 16},
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"reduceOptions": {
|
|
"calcs": ["lastNotNull"],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"textMode": "auto"
|
|
}
|
|
},
|
|
{
|
|
"id": 5,
|
|
"title": "Service Details",
|
|
"type": "table",
|
|
"targets": [
|
|
{
|
|
"expr": "probe_success{job=~\"http-service-health|tcp-service-health\"}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"thresholds": {
|
|
"steps": [
|
|
{"color": "red", "value": 0},
|
|
{"color": "green", "value": 1}
|
|
]
|
|
},
|
|
"mappings": [
|
|
{
|
|
"options": {
|
|
"0": {"text": "Offline", "color": "red"},
|
|
"1": {"text": "Online", "color": "green"}
|
|
},
|
|
"type": "value"
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 16},
|
|
"transformations": [
|
|
{
|
|
"id": "organize",
|
|
"options": {
|
|
"excludeByName": {
|
|
"Time": true,
|
|
"__name__": true,
|
|
"job": true
|
|
},
|
|
"indexByName": {},
|
|
"renameByName": {
|
|
"Value": "Status",
|
|
"instance": "Service"
|
|
}
|
|
}
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"time": {
|
|
"from": "now-1h",
|
|
"to": "now"
|
|
},
|
|
"refresh": "30s"
|
|
}
|