Major infrastructure migration and Vaultwarden PostgreSQL troubleshooting
COMPREHENSIVE CHANGES: INFRASTRUCTURE MIGRATION: - Migrated services to Docker Swarm on OMV800 (192.168.50.229) - Deployed PostgreSQL database for Vaultwarden migration - Updated all stack configurations for Docker Swarm compatibility - Added comprehensive monitoring stack (Prometheus, Grafana, Blackbox) - Implemented proper secret management for all services VAULTWARDEN POSTGRESQL MIGRATION: - Attempted migration from SQLite to PostgreSQL for NFS compatibility - Created PostgreSQL stack with proper user/password configuration - Built custom Vaultwarden image with PostgreSQL support - Troubleshot persistent SQLite fallback issue despite PostgreSQL config - Identified known issue where Vaultwarden silently falls back to SQLite - Added ENABLE_DB_WAL=false to prevent filesystem compatibility issues - Current status: Old Vaultwarden on lenovo410 still working, new one has config issues PAPERLESS SERVICES: - Successfully deployed Paperless-NGX and Paperless-AI on OMV800 - Both services running on ports 8000 and 3000 respectively - Caddy configuration updated for external access - Services accessible via paperless.pressmess.duckdns.org and paperless-ai.pressmess.duckdns.org CADDY CONFIGURATION: - Updated Caddyfile on Surface (192.168.50.254) for new service locations - Fixed Vaultwarden reverse proxy to point to new Docker Swarm service - Removed old notification hub reference that was causing conflicts - All services properly configured for external access via DuckDNS BACKUP AND DISCOVERY: - Created comprehensive backup system for all hosts - Generated detailed discovery reports for infrastructure analysis - Implemented automated backup validation scripts - Created migration progress tracking and verification reports MONITORING STACK: - Deployed Prometheus, Grafana, and Blackbox monitoring - Created infrastructure and system overview dashboards - Added proper service discovery and alerting configuration - Implemented performance monitoring for all critical services DOCUMENTATION: - Reorganized documentation into logical structure - Created comprehensive migration playbook and troubleshooting guides - Added hardware specifications and optimization recommendations - Documented all configuration changes and service dependencies CURRENT STATUS: - Paperless services: ✅ Working and accessible externally - Vaultwarden: ❌ PostgreSQL configuration issues, old instance still working - Monitoring: ✅ Deployed and operational - Caddy: ✅ Updated and working for external access - PostgreSQL: ✅ Database running, connection issues with Vaultwarden NEXT STEPS: - Continue troubleshooting Vaultwarden PostgreSQL configuration - Consider alternative approaches for Vaultwarden migration - Validate all external service access - Complete final migration validation TECHNICAL NOTES: - Used Docker Swarm for orchestration on OMV800 - Implemented proper secret management for sensitive data - Added comprehensive logging and monitoring - Created automated backup and validation scripts
This commit is contained in:
31
configs/monitoring/blackbox.yml
Normal file
31
configs/monitoring/blackbox.yml
Normal file
@@ -0,0 +1,31 @@
|
||||
modules:
|
||||
http_2xx:
|
||||
prober: http
|
||||
timeout: 5s
|
||||
http:
|
||||
preferred_ip_protocol: "ip4"
|
||||
valid_status_codes: [200, 201, 202, 203, 204, 205, 206, 207, 208, 226]
|
||||
fail_if_ssl: false
|
||||
fail_if_not_ssl: false
|
||||
|
||||
http_post_2xx:
|
||||
prober: http
|
||||
timeout: 5s
|
||||
http:
|
||||
method: POST
|
||||
preferred_ip_protocol: "ip4"
|
||||
valid_status_codes: [200, 201, 202, 203, 204, 205, 206, 207, 208, 226]
|
||||
fail_if_ssl: false
|
||||
fail_if_not_ssl: false
|
||||
|
||||
tcp_connect:
|
||||
prober: tcp
|
||||
timeout: 5s
|
||||
tcp:
|
||||
preferred_ip_protocol: "ip4"
|
||||
|
||||
icmp:
|
||||
prober: icmp
|
||||
timeout: 5s
|
||||
icmp:
|
||||
preferred_ip_protocol: "ip4"
|
||||
245
configs/monitoring/dashboards/infrastructure-overview.json
Normal file
245
configs/monitoring/dashboards/infrastructure-overview.json
Normal file
@@ -0,0 +1,245 @@
|
||||
{
|
||||
"title": "Infrastructure Overview",
|
||||
"tags": ["infrastructure", "overview"],
|
||||
"style": "dark",
|
||||
"timezone": "browser",
|
||||
"panels": [
|
||||
{
|
||||
"id": 1,
|
||||
"title": "HTTP Service Health Status",
|
||||
"type": "stat",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "probe_success{job=\"http-service-health\"}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"thresholds": {
|
||||
"steps": [
|
||||
{"color": "red", "value": 0},
|
||||
{"color": "green", "value": 1}
|
||||
]
|
||||
},
|
||||
"mappings": [
|
||||
{
|
||||
"options": {
|
||||
"0": {"text": "Down", "color": "red"},
|
||||
"1": {"text": "Up", "color": "green"}
|
||||
},
|
||||
"type": "value"
|
||||
}
|
||||
],
|
||||
"unit": "short"
|
||||
}
|
||||
},
|
||||
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 0},
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"title": "TCP Service Health Status",
|
||||
"type": "stat",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "probe_success{job=\"tcp-service-health\"}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"thresholds": {
|
||||
"steps": [
|
||||
{"color": "red", "value": 0},
|
||||
{"color": "green", "value": 1}
|
||||
]
|
||||
},
|
||||
"mappings": [
|
||||
{
|
||||
"options": {
|
||||
"0": {"text": "Down", "color": "red"},
|
||||
"1": {"text": "Up", "color": "green"}
|
||||
},
|
||||
"type": "value"
|
||||
}
|
||||
],
|
||||
"unit": "short"
|
||||
}
|
||||
},
|
||||
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 0},
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"title": "Service Response Time",
|
||||
"type": "timeseries",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "probe_duration_seconds{job=\"http-service-health\"}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"vis": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"unit": "s"
|
||||
}
|
||||
},
|
||||
"gridPos": {"h": 8, "w": 24, "x": 0, "y": 8}
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"title": "HTTP Service Availability Summary",
|
||||
"type": "stat",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(probe_success{job=\"http-service-health\"})",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"thresholds": {
|
||||
"steps": [
|
||||
{"color": "red", "value": 0},
|
||||
{"color": "yellow", "value": 3},
|
||||
{"color": "green", "value": 6}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
}
|
||||
},
|
||||
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 16},
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"title": "Service Details",
|
||||
"type": "table",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "probe_success{job=~\"http-service-health|tcp-service-health\"}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"thresholds": {
|
||||
"steps": [
|
||||
{"color": "red", "value": 0},
|
||||
{"color": "green", "value": 1}
|
||||
]
|
||||
},
|
||||
"mappings": [
|
||||
{
|
||||
"options": {
|
||||
"0": {"text": "Offline", "color": "red"},
|
||||
"1": {"text": "Online", "color": "green"}
|
||||
},
|
||||
"type": "value"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 16},
|
||||
"transformations": [
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"excludeByName": {
|
||||
"Time": true,
|
||||
"__name__": true,
|
||||
"job": true
|
||||
},
|
||||
"indexByName": {},
|
||||
"renameByName": {
|
||||
"Value": "Status",
|
||||
"instance": "Service"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"refresh": "30s"
|
||||
}
|
||||
316
configs/monitoring/dashboards/system-overview.json
Normal file
316
configs/monitoring/dashboards/system-overview.json
Normal file
@@ -0,0 +1,316 @@
|
||||
{
|
||||
"title": "System Overview",
|
||||
"tags": ["system", "infrastructure", "overview"],
|
||||
"style": "dark",
|
||||
"timezone": "browser",
|
||||
"panels": [
|
||||
{
|
||||
"id": 1,
|
||||
"title": "CPU Usage",
|
||||
"type": "timeseries",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "100 - (avg by (instance) (irate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100)",
|
||||
"refId": "A",
|
||||
"legendFormat": "CPU %"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"vis": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"unit": "percent",
|
||||
"min": 0,
|
||||
"max": 100
|
||||
}
|
||||
},
|
||||
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 0}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"title": "Memory Usage",
|
||||
"type": "timeseries",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100",
|
||||
"refId": "A",
|
||||
"legendFormat": "Memory %"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"vis": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"unit": "percent",
|
||||
"min": 0,
|
||||
"max": 100
|
||||
}
|
||||
},
|
||||
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 0}
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"title": "Disk Usage",
|
||||
"type": "timeseries",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(1 - (node_filesystem_avail_bytes{mountpoint=\"/\",fstype!=\"rootfs\"} / node_filesystem_size_bytes{mountpoint=\"/\",fstype!=\"rootfs\"})) * 100",
|
||||
"refId": "A",
|
||||
"legendFormat": "Disk %"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"vis": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"unit": "percent",
|
||||
"min": 0,
|
||||
"max": 100
|
||||
}
|
||||
},
|
||||
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 8}
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"title": "Network I/O",
|
||||
"type": "timeseries",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(node_network_receive_bytes_total{device!=\"lo\"}[5m])",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{device}} - RX"
|
||||
},
|
||||
{
|
||||
"expr": "rate(node_network_transmit_bytes_total{device!=\"lo\"}[5m])",
|
||||
"refId": "B",
|
||||
"legendFormat": "{{device}} - TX"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"vis": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"unit": "Bps"
|
||||
}
|
||||
},
|
||||
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 8}
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"title": "System Load",
|
||||
"type": "timeseries",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "node_load1",
|
||||
"refId": "A",
|
||||
"legendFormat": "1m Load"
|
||||
},
|
||||
{
|
||||
"expr": "node_load5",
|
||||
"refId": "B",
|
||||
"legendFormat": "5m Load"
|
||||
},
|
||||
{
|
||||
"expr": "node_load15",
|
||||
"refId": "C",
|
||||
"legendFormat": "15m Load"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 10,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"vis": false
|
||||
},
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 2,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "never",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 16}
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"title": "System Info",
|
||||
"type": "stat",
|
||||
"targets": [
|
||||
{
|
||||
"expr": "node_uname_info",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"thresholds": {
|
||||
"steps": [
|
||||
{"color": "green", "value": 0}
|
||||
]
|
||||
},
|
||||
"mappings": [],
|
||||
"unit": "short"
|
||||
}
|
||||
},
|
||||
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 16},
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": ["lastNotNull"],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
}
|
||||
}
|
||||
],
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"refresh": "30s"
|
||||
}
|
||||
12
configs/monitoring/grafana-dashboards.yml
Normal file
12
configs/monitoring/grafana-dashboards.yml
Normal file
@@ -0,0 +1,12 @@
|
||||
apiVersion: 1
|
||||
|
||||
providers:
|
||||
- name: 'default'
|
||||
orgId: 1
|
||||
folder: ''
|
||||
type: file
|
||||
disableDeletion: false
|
||||
updateIntervalSeconds: 10
|
||||
allowUiUpdates: true
|
||||
options:
|
||||
path: /etc/grafana/provisioning/dashboards
|
||||
14
configs/monitoring/grafana-datasources.yml
Normal file
14
configs/monitoring/grafana-datasources.yml
Normal file
@@ -0,0 +1,14 @@
|
||||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: Prometheus
|
||||
type: prometheus
|
||||
access: proxy
|
||||
url: http://192.168.50.229:9091
|
||||
isDefault: true
|
||||
editable: true
|
||||
jsonData:
|
||||
timeInterval: "15s"
|
||||
queryTimeout: "60s"
|
||||
httpMethod: "POST"
|
||||
secureJsonData: {}
|
||||
70
configs/monitoring/prometheus-production.yml
Normal file
70
configs/monitoring/prometheus-production.yml
Normal file
@@ -0,0 +1,70 @@
|
||||
global:
|
||||
scrape_interval: 15s
|
||||
evaluation_interval: 15s
|
||||
|
||||
scrape_configs:
|
||||
# Prometheus itself
|
||||
- job_name: 'prometheus'
|
||||
static_configs:
|
||||
- targets: ['localhost:9090']
|
||||
|
||||
# Blackbox exporter
|
||||
- job_name: 'blackbox'
|
||||
static_configs:
|
||||
- targets: ['192.168.50.229:9115']
|
||||
|
||||
# Node exporter - system metrics
|
||||
- job_name: 'node-exporter'
|
||||
static_configs:
|
||||
- targets: ['192.168.50.229:9100']
|
||||
scrape_interval: 30s
|
||||
|
||||
# Docker Swarm services that expose metrics
|
||||
- job_name: 'docker-swarm-metrics'
|
||||
static_configs:
|
||||
- targets:
|
||||
- '192.168.50.229:9091' # Prometheus
|
||||
- '192.168.50.229:3002' # Grafana
|
||||
scrape_interval: 30s
|
||||
|
||||
# HTTP service health checks via blackbox exporter
|
||||
- job_name: 'http-service-health'
|
||||
metrics_path: /probe
|
||||
params:
|
||||
module: [http_2xx]
|
||||
static_configs:
|
||||
- targets:
|
||||
- 'http://192.168.50.229:8000' # Paperless-NGX
|
||||
- 'http://192.168.50.229:3000' # Paperless-AI
|
||||
- 'http://192.168.50.229:8081' # Nextcloud
|
||||
- 'http://192.168.50.181:8123' # Home Assistant
|
||||
- 'http://192.168.50.181:9000' # Portainer
|
||||
- 'http://192.168.50.66:9080' # AppFlowy
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
target_label: __param_target
|
||||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- target_label: __address__
|
||||
replacement: 192.168.50.229:9115
|
||||
scrape_interval: 60s
|
||||
|
||||
# TCP service health checks via blackbox exporter
|
||||
- job_name: 'tcp-service-health'
|
||||
metrics_path: /probe
|
||||
params:
|
||||
module: [tcp_connect]
|
||||
static_configs:
|
||||
- targets:
|
||||
- '192.168.50.229:6379' # Redis
|
||||
- '192.168.50.229:5432' # PostgreSQL
|
||||
- '192.168.50.229:3306' # MariaDB
|
||||
- '192.168.50.229:1883' # Mosquitto
|
||||
relabel_configs:
|
||||
- source_labels: [__address__]
|
||||
target_label: __param_target
|
||||
- source_labels: [__param_target]
|
||||
target_label: instance
|
||||
- target_label: __address__
|
||||
replacement: 192.168.50.229:9115
|
||||
scrape_interval: 60s
|
||||
47
configs/monitoring/prometheus-simple.yml
Normal file
47
configs/monitoring/prometheus-simple.yml
Normal file
@@ -0,0 +1,47 @@
|
||||
global:
|
||||
scrape_interval: 15s
|
||||
evaluation_interval: 15s
|
||||
|
||||
scrape_configs:
|
||||
# Prometheus itself
|
||||
- job_name: 'prometheus'
|
||||
static_configs:
|
||||
- targets: ['localhost:9090']
|
||||
|
||||
# Docker Swarm services (basic)
|
||||
- job_name: 'docker-swarm-services'
|
||||
static_configs:
|
||||
- targets:
|
||||
- '192.168.50.229:9090' # Prometheus
|
||||
- '192.168.50.229:3000' # Grafana
|
||||
- '192.168.50.229:6379' # Redis
|
||||
- '192.168.50.229:5432' # PostgreSQL
|
||||
- '192.168.50.229:3306' # MariaDB
|
||||
- '192.168.50.229:1883' # Mosquitto
|
||||
scrape_interval: 30s
|
||||
|
||||
# Infrastructure nodes
|
||||
- job_name: 'infrastructure-nodes'
|
||||
static_configs:
|
||||
- targets:
|
||||
- '192.168.50.229:22' # OMV800
|
||||
- '192.168.50.254:22' # Surface
|
||||
- '192.168.50.181:22' # jonathan-2518f5u
|
||||
- '192.168.50.66:22' # lenovo420
|
||||
- '192.168.50.145:22' # audrey
|
||||
- '192.168.50.225:22' # fedora
|
||||
scrape_interval: 60s
|
||||
|
||||
# Application services
|
||||
- job_name: 'application-services'
|
||||
static_configs:
|
||||
- targets:
|
||||
- '192.168.50.229:8081' # Nextcloud
|
||||
- '192.168.50.229:8000' # Paperless-NGX
|
||||
- '192.168.50.229:3000' # Paperless-AI
|
||||
- '192.168.50.181:8123' # Home Assistant
|
||||
- '192.168.50.181:9000' # Portainer
|
||||
- '192.168.50.181:5678' # n8n
|
||||
- '192.168.50.66:9080' # AppFlowy
|
||||
- '192.168.50.254:80' # Caddy
|
||||
scrape_interval: 30s
|
||||
Reference in New Issue
Block a user