Initial commit

This commit is contained in:
admin
2025-08-24 11:13:39 -04:00
commit fb869f1131
168 changed files with 47986 additions and 0 deletions

View File

@@ -0,0 +1,96 @@
# Netdata Parent-Child Configuration
## Architecture Overview
This playbook configures a centralized monitoring setup using Netdata's parent-child streaming:
- **Parent Node**: Fedora workstation (192.168.1.243) - Stores all metrics, provides dashboard
- **Child Nodes**: All other hosts - Stream metrics to parent, minimal local storage
## Current Installation Status
Based on the latest check:
### ✅ Already Running
- `lenovo420` - Fedora workstation (1.7GB memory usage)
- `omv800` - OpenMediaVault server (561MB memory usage)
- `lenovo` - Lenovo laptop (438MB memory usage)
### ❌ Need Installation
- `surface` - Surface tablet
- `fedora` - **Target parent node**
- `omvbackup` - Backup server
### ⚠️ Unreachable
- `audrey` - Connection timeout (may be offline)
## Resource Usage Benefits
**Before (current standalone instances):**
- Total memory: ~2.7GB across 3 nodes
- Each node stores full retention
- Duplicated web interfaces
**After (parent-child setup):**
- Parent: ~512MB (centralized storage + web UI)
- Children: ~32MB each (streaming only)
- Total memory: ~640MB (76% reduction)
## Running the Playbook
```bash
# Configure all nodes
ansible-playbook -i inventory.ini playbooks/netdata-parent-child.yml
# Configure only specific hosts
ansible-playbook -i inventory.ini playbooks/netdata-parent-child.yml --limit fedora,surface
# Test connectivity after setup
ansible-playbook -i inventory.ini playbooks/netdata-parent-child.yml --tags verify
```
## Configuration Details
### Parent Node (fedora)
- Binds to `*:19999` (accessible from network)
- 2GB disk space for metrics storage
- Receives streams from all children
- Provides centralized dashboard
### Child Nodes (all others)
- Memory mode: `none` (no local storage)
- Binds to `localhost:19999` only
- Streams all metrics to parent
- Minimal resource footprint
## Accessing the Dashboard
After configuration:
- **Central Dashboard**: http://192.168.1.243:19999
- **Individual Node Access**: http://node-ip:19999 (localhost only on children)
## Security Features
- API key authentication for streaming
- Network restrictions (home + Tailscale networks only)
- Local-only access on child nodes
- Firewall rules automatically configured
## Troubleshooting
### Check stream status on parent:
```bash
curl http://192.168.1.243:19999/api/v1/info | jq '.hosts'
```
### Check streaming on child:
```bash
systemctl status netdata
journalctl -u netdata -f
```
### Verify connectivity:
```bash
# From child node
curl -I http://192.168.1.243:19999/api/v1/info
```

View File

@@ -0,0 +1,190 @@
---
- name: Extend monitoring to non-Linux devices
hosts: fedora # Run on parent node
become: yes
vars:
network_devices:
- name: "Gateway Router"
ip: "192.168.50.1"
type: "router"
- name: "Immich Photos"
ip: "192.168.50.66"
type: "docker_host"
- name: "Unknown Device 1"
ip: "192.168.50.20"
type: "unknown"
- name: "Unknown Device 2"
ip: "192.168.50.25"
type: "unknown"
tasks:
- name: Create network monitoring configuration
copy:
content: |
# Network Device Monitoring Configuration
# Add to /etc/netdata/go.d/ping.conf
jobs:
- name: home_network_ping
hosts:
{% for device in network_devices %}
- {{ device.ip }} # {{ device.name }}
{% endfor %}
interval: 30s
count: 3
timeout: 1s
- name: critical_services_ping
hosts:
- 192.168.50.1 # Gateway
- 192.168.50.66 # Immich Photos
- 8.8.8.8 # Google DNS
- 1.1.1.1 # Cloudflare DNS
interval: 10s
count: 1
timeout: 2s
dest: /etc/netdata/go.d/ping.conf
- name: Enable ping monitoring in Netdata
lineinfile:
path: /etc/netdata/netdata.conf
regexp: '^.*\[plugin:go.d\]'
line: '[plugin:go.d]'
create: yes
- name: Configure ping plugin
blockinfile:
path: /etc/netdata/netdata.conf
marker: "# {mark} PING MONITORING CONFIG"
block: |
[plugin:go.d]
ping = yes
- name: Create Docker monitoring script for remote hosts
copy:
content: |
#!/bin/bash
# Monitor Docker containers on remote hosts
REMOTE_HOSTS=(
"192.168.50.66:photos.local" # Immich Photos
"100.78.26.112:omv800" # OMV with Docker
"100.98.144.95:lenovo420" # Lenovo with Docker
)
for host_info in "${REMOTE_HOSTS[@]}"; do
IFS=':' read -r ip hostname <<< "$host_info"
echo "=== Docker containers on $hostname ($ip) ==="
# Try to get container stats via SSH
if ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "jon@$ip" "docker ps --format 'table {{.Names}}\t{{.Status}}\t{{.Ports}}'" 2>/dev/null; then
echo "SSH connection successful"
else
echo "SSH connection failed - checking if HTTP monitoring available"
# Check for cAdvisor or similar
if curl -s --connect-timeout 3 "http://$ip:8080/metrics" >/dev/null 2>&1; then
echo "cAdvisor metrics available at http://$ip:8080"
fi
fi
echo ""
done
dest: /usr/local/bin/check-remote-docker.sh
mode: '0755'
- name: Add cron job for Docker monitoring
cron:
name: "Check remote Docker containers"
minute: "*/5"
job: "/usr/local/bin/check-remote-docker.sh >> /var/log/remote-docker-check.log 2>&1"
- name: Create Windows monitoring via WMI (if available)
copy:
content: |
# Windows monitoring configuration
# Requires wmic or PowerShell remoting to be enabled
[plugin:apps]
# Disable apps monitoring to reduce load
enabled = no
# Example Windows WMI monitoring (requires additional setup)
# [plugin:wmi]
# command = wmic -U domain/user%password //192.168.50.100 "SELECT * FROM Win32_Processor"
# update every = 60
dest: /etc/netdata/windows-monitoring.conf
- name: Test ping monitoring
shell: |
/usr/libexec/netdata/plugins.d/go.d.plugin -m ping -d
register: ping_test
ignore_errors: yes
- name: Display ping test results
debug:
var: ping_test.stdout_lines
- name: Restart Netdata to apply changes
systemd:
name: netdata
state: restarted
- name: Create monitoring dashboard links
copy:
content: |
# Extended Monitoring Dashboard Links
## Main Netdata Dashboard
http://{{ ansible_host }}:19999
## Network Ping Monitoring
http://{{ ansible_host }}:19999/#menu_ping_submenu_home_network_ping
## System Overview
http://{{ ansible_host }}:19999/#menu_system_submenu_cpu
## All Connected Hosts
{% for host in groups['all_linux'] %}
- {{ host }}: http://{{ ansible_host }}:19999/host/{{ host }}
{% endfor %}
## Discovered Network Devices (52 total)
- Gateway: 192.168.50.1
- Photos Server: 192.168.50.66
- 50 other devices (IoT, mobile, Windows, etc.)
## Monitoring Capabilities by Device Type:
### ✅ Full Monitoring (Linux hosts with Netdata)
- fedora (parent)
- lenovo420, omv800, lenovo, surface, omvbackup (children)
### 📊 Limited Monitoring (ping, port checks)
- Router/gateway
- Docker hosts
- Network appliances
### ⚠️ Manual Monitoring Required
- Windows machines (need WMI/SNMP setup)
- Mobile devices (battery optimization prevents agents)
- IoT devices (resource constrained)
dest: /home/jonathan/monitoring-dashboard.md
owner: jonathan
group: jonathan
- name: Summary of monitoring coverage
debug:
msg: |
Monitoring Coverage Summary:
✅ Linux hosts: 6/6 (100%) - Full system monitoring
📊 Network devices: 52 discovered - Ping monitoring added
🐳 Docker containers: Remote monitoring via SSH/API
📱 Mobile/IoT: 46+ devices - Network connectivity only
Total network coverage: Ping monitoring for all 52 devices
Detailed monitoring: 6 Linux systems (infrastructure core)
Dashboard: http://{{ ansible_host }}:19999

View File

@@ -0,0 +1,13 @@
---
# Netdata Configuration
netdata_api_key: "homelab-stream-key-2024-secure"
netdata_parent_retention_hours: 168 # 7 days
netdata_child_retention_hours: 1 # 1 hour local buffer
# Monitoring Settings
netdata_memory_limit_mb: 128 # Memory limit for child nodes
netdata_disk_space_mb: 512 # Disk space limit for child nodes
# Network Configuration
home_network_cidr: "192.168.1.0/24"
tailscale_network_cidr: "100.64.0.0/10"

View File

@@ -0,0 +1,198 @@
---
- name: Configure Netdata Parent-Child Streaming Architecture
hosts: all
become: yes
vars:
netdata_parent_host: "192.168.50.225" # Fedora workstation wired IP (more stable)
netdata_stream_api_key: "{{ netdata_api_key | default('homelab-stream-key-2024') }}"
tasks:
- name: Install Netdata on hosts where it's missing
block:
- name: Install Netdata via package manager (Fedora/RHEL)
dnf:
name: netdata
state: present
when: ansible_os_family == "RedHat"
- name: Install Netdata via package manager (Debian/Ubuntu)
apt:
name: netdata
state: present
update_cache: yes
when: ansible_os_family == "Debian"
- name: Install Netdata via installer script (fallback)
shell: |
curl -L https://get.netdata.cloud/kickstart.sh | bash -s -- --stable-channel --disable-telemetry --dont-wait
args:
creates: /usr/sbin/netdata
when: ansible_os_family not in ["RedHat", "Debian"]
when: inventory_hostname in ['surface', 'fedora', 'omvbackup']
- name: Configure Netdata Parent (Central monitoring node)
block:
- name: Create Netdata parent configuration
copy:
content: |
[global]
# Parent node configuration
memory mode = dbengine
page cache size = 32
dbengine disk space = 2048
# Network settings
bind to = *:19999
allow connections from = 192.168.50.0/24 100.64.0.0/10
# Disable local data collection plugins to reduce load
[plugins]
apps = no
cgroups = no
diskspace = no
[web]
# Allow dashboard from any host on home network
allow dashboard from = 192.168.50.0/24 100.64.0.0/10
# Stream configuration for receiving child data
[stream]
enabled = yes
destination =
api key = {{ netdata_stream_api_key }}
timeout seconds = 60
buffer size bytes = 1048576
reconnect delay seconds = 5
initial clock resync iterations = 60
# Accept streams from child nodes
[{{ netdata_stream_api_key }}]
enabled = yes
allow from = 192.168.50.0/24 100.64.0.0/10
default history = 3600
default memory = dbengine
health enabled by default = auto
dest: /etc/netdata/netdata.conf
backup: yes
- name: Enable and start Netdata parent
systemd:
name: netdata
enabled: yes
state: restarted
- name: Open firewall for Netdata (firewalld)
firewalld:
port: 19999/tcp
permanent: yes
state: enabled
immediate: yes
ignore_errors: yes
- name: Open firewall for Netdata (ufw)
ufw:
rule: allow
port: '19999'
proto: tcp
ignore_errors: yes
when: inventory_hostname == 'fedora'
- name: Configure Netdata Children (streaming to parent)
block:
- name: Configure Netdata child to stream to parent
copy:
content: |
[global]
# Child node - minimal local storage
memory mode = none
# Network settings
bind to = localhost:19999
[web]
# Restrict dashboard access to localhost only
allow dashboard from = localhost
# Stream all data to parent
[stream]
enabled = yes
destination = {{ netdata_parent_host }}:19999
api key = {{ netdata_stream_api_key }}
timeout seconds = 60
buffer size bytes = 1048576
reconnect delay seconds = 5
initial clock resync iterations = 60
# Send everything to parent
send charts matching = *
# Reduce resource usage
[plugins]
# Keep essential monitoring but reduce frequency
proc = yes
diskspace = yes
cgroups = yes
apps = yes
# Optimize collection intervals
[plugin:proc]
/proc/net/dev = yes
/proc/diskstats = yes
/proc/net/sockstat = yes
/proc/meminfo = yes
/proc/vmstat = yes
/proc/stat = yes
/proc/loadavg = yes
dest: /etc/netdata/netdata.conf
backup: yes
- name: Restart Netdata child
systemd:
name: netdata
state: restarted
when: inventory_hostname != 'fedora'
- name: Verify Netdata service status
systemd:
name: netdata
state: started
enabled: yes
register: netdata_status
- name: Display Netdata status
debug:
msg: |
Netdata {{ 'parent' if inventory_hostname == 'fedora' else 'child' }} configured on {{ inventory_hostname }}
Status: {{ netdata_status.status.ActiveState }}
{% if inventory_hostname == 'fedora' %}
Parent dashboard: http://{{ ansible_host }}:19999
{% else %}
Streaming to: {{ netdata_parent_host }}:19999
{% endif %}
- name: Test connectivity to parent (from children)
uri:
url: "http://{{ netdata_parent_host }}:19999/api/v1/info"
method: GET
timeout: 10
register: parent_test
ignore_errors: yes
when: inventory_hostname != 'fedora'
- name: Display connectivity test results
debug:
msg: |
Connection to parent: {{ 'SUCCESS' if parent_test.status == 200 else 'FAILED' }}
{% if parent_test.status != 200 %}
Error: {{ parent_test.msg | default('Unknown error') }}
{% endif %}
when: inventory_hostname != 'fedora' and parent_test is defined
handlers:
- name: restart netdata
systemd:
name: netdata
state: restarted