Initial commit
This commit is contained in:
190
playbooks/extend-monitoring.yml
Normal file
190
playbooks/extend-monitoring.yml
Normal file
@@ -0,0 +1,190 @@
|
||||
---
|
||||
- name: Extend monitoring to non-Linux devices
|
||||
hosts: fedora # Run on parent node
|
||||
become: yes
|
||||
vars:
|
||||
network_devices:
|
||||
- name: "Gateway Router"
|
||||
ip: "192.168.50.1"
|
||||
type: "router"
|
||||
- name: "Immich Photos"
|
||||
ip: "192.168.50.66"
|
||||
type: "docker_host"
|
||||
- name: "Unknown Device 1"
|
||||
ip: "192.168.50.20"
|
||||
type: "unknown"
|
||||
- name: "Unknown Device 2"
|
||||
ip: "192.168.50.25"
|
||||
type: "unknown"
|
||||
|
||||
tasks:
|
||||
- name: Create network monitoring configuration
|
||||
copy:
|
||||
content: |
|
||||
# Network Device Monitoring Configuration
|
||||
# Add to /etc/netdata/go.d/ping.conf
|
||||
|
||||
jobs:
|
||||
- name: home_network_ping
|
||||
hosts:
|
||||
{% for device in network_devices %}
|
||||
- {{ device.ip }} # {{ device.name }}
|
||||
{% endfor %}
|
||||
interval: 30s
|
||||
count: 3
|
||||
timeout: 1s
|
||||
|
||||
- name: critical_services_ping
|
||||
hosts:
|
||||
- 192.168.50.1 # Gateway
|
||||
- 192.168.50.66 # Immich Photos
|
||||
- 8.8.8.8 # Google DNS
|
||||
- 1.1.1.1 # Cloudflare DNS
|
||||
interval: 10s
|
||||
count: 1
|
||||
timeout: 2s
|
||||
|
||||
dest: /etc/netdata/go.d/ping.conf
|
||||
|
||||
- name: Enable ping monitoring in Netdata
|
||||
lineinfile:
|
||||
path: /etc/netdata/netdata.conf
|
||||
regexp: '^.*\[plugin:go.d\]'
|
||||
line: '[plugin:go.d]'
|
||||
create: yes
|
||||
|
||||
- name: Configure ping plugin
|
||||
blockinfile:
|
||||
path: /etc/netdata/netdata.conf
|
||||
marker: "# {mark} PING MONITORING CONFIG"
|
||||
block: |
|
||||
[plugin:go.d]
|
||||
ping = yes
|
||||
|
||||
- name: Create Docker monitoring script for remote hosts
|
||||
copy:
|
||||
content: |
|
||||
#!/bin/bash
|
||||
# Monitor Docker containers on remote hosts
|
||||
|
||||
REMOTE_HOSTS=(
|
||||
"192.168.50.66:photos.local" # Immich Photos
|
||||
"100.78.26.112:omv800" # OMV with Docker
|
||||
"100.98.144.95:lenovo420" # Lenovo with Docker
|
||||
)
|
||||
|
||||
for host_info in "${REMOTE_HOSTS[@]}"; do
|
||||
IFS=':' read -r ip hostname <<< "$host_info"
|
||||
echo "=== Docker containers on $hostname ($ip) ==="
|
||||
|
||||
# Try to get container stats via SSH
|
||||
if ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "jon@$ip" "docker ps --format 'table {{.Names}}\t{{.Status}}\t{{.Ports}}'" 2>/dev/null; then
|
||||
echo "SSH connection successful"
|
||||
else
|
||||
echo "SSH connection failed - checking if HTTP monitoring available"
|
||||
# Check for cAdvisor or similar
|
||||
if curl -s --connect-timeout 3 "http://$ip:8080/metrics" >/dev/null 2>&1; then
|
||||
echo "cAdvisor metrics available at http://$ip:8080"
|
||||
fi
|
||||
fi
|
||||
echo ""
|
||||
done
|
||||
dest: /usr/local/bin/check-remote-docker.sh
|
||||
mode: '0755'
|
||||
|
||||
- name: Add cron job for Docker monitoring
|
||||
cron:
|
||||
name: "Check remote Docker containers"
|
||||
minute: "*/5"
|
||||
job: "/usr/local/bin/check-remote-docker.sh >> /var/log/remote-docker-check.log 2>&1"
|
||||
|
||||
- name: Create Windows monitoring via WMI (if available)
|
||||
copy:
|
||||
content: |
|
||||
# Windows monitoring configuration
|
||||
# Requires wmic or PowerShell remoting to be enabled
|
||||
|
||||
[plugin:apps]
|
||||
# Disable apps monitoring to reduce load
|
||||
enabled = no
|
||||
|
||||
# Example Windows WMI monitoring (requires additional setup)
|
||||
# [plugin:wmi]
|
||||
# command = wmic -U domain/user%password //192.168.50.100 "SELECT * FROM Win32_Processor"
|
||||
# update every = 60
|
||||
|
||||
dest: /etc/netdata/windows-monitoring.conf
|
||||
|
||||
- name: Test ping monitoring
|
||||
shell: |
|
||||
/usr/libexec/netdata/plugins.d/go.d.plugin -m ping -d
|
||||
register: ping_test
|
||||
ignore_errors: yes
|
||||
|
||||
- name: Display ping test results
|
||||
debug:
|
||||
var: ping_test.stdout_lines
|
||||
|
||||
- name: Restart Netdata to apply changes
|
||||
systemd:
|
||||
name: netdata
|
||||
state: restarted
|
||||
|
||||
- name: Create monitoring dashboard links
|
||||
copy:
|
||||
content: |
|
||||
# Extended Monitoring Dashboard Links
|
||||
|
||||
## Main Netdata Dashboard
|
||||
http://{{ ansible_host }}:19999
|
||||
|
||||
## Network Ping Monitoring
|
||||
http://{{ ansible_host }}:19999/#menu_ping_submenu_home_network_ping
|
||||
|
||||
## System Overview
|
||||
http://{{ ansible_host }}:19999/#menu_system_submenu_cpu
|
||||
|
||||
## All Connected Hosts
|
||||
{% for host in groups['all_linux'] %}
|
||||
- {{ host }}: http://{{ ansible_host }}:19999/host/{{ host }}
|
||||
{% endfor %}
|
||||
|
||||
## Discovered Network Devices (52 total)
|
||||
- Gateway: 192.168.50.1
|
||||
- Photos Server: 192.168.50.66
|
||||
- 50 other devices (IoT, mobile, Windows, etc.)
|
||||
|
||||
## Monitoring Capabilities by Device Type:
|
||||
|
||||
### ✅ Full Monitoring (Linux hosts with Netdata)
|
||||
- fedora (parent)
|
||||
- lenovo420, omv800, lenovo, surface, omvbackup (children)
|
||||
|
||||
### 📊 Limited Monitoring (ping, port checks)
|
||||
- Router/gateway
|
||||
- Docker hosts
|
||||
- Network appliances
|
||||
|
||||
### ⚠️ Manual Monitoring Required
|
||||
- Windows machines (need WMI/SNMP setup)
|
||||
- Mobile devices (battery optimization prevents agents)
|
||||
- IoT devices (resource constrained)
|
||||
|
||||
dest: /home/jonathan/monitoring-dashboard.md
|
||||
owner: jonathan
|
||||
group: jonathan
|
||||
|
||||
- name: Summary of monitoring coverage
|
||||
debug:
|
||||
msg: |
|
||||
Monitoring Coverage Summary:
|
||||
|
||||
✅ Linux hosts: 6/6 (100%) - Full system monitoring
|
||||
📊 Network devices: 52 discovered - Ping monitoring added
|
||||
🐳 Docker containers: Remote monitoring via SSH/API
|
||||
📱 Mobile/IoT: 46+ devices - Network connectivity only
|
||||
|
||||
Total network coverage: Ping monitoring for all 52 devices
|
||||
Detailed monitoring: 6 Linux systems (infrastructure core)
|
||||
|
||||
Dashboard: http://{{ ansible_host }}:19999
|
||||
Reference in New Issue
Block a user