Files
HomeAudit/playbooks/netdata-parent-child.yml
2025-08-24 11:13:39 -04:00

198 lines
6.7 KiB
YAML

---
- name: Configure Netdata Parent-Child Streaming Architecture
hosts: all
become: yes
vars:
netdata_parent_host: "192.168.50.225" # Fedora workstation wired IP (more stable)
netdata_stream_api_key: "{{ netdata_api_key | default('homelab-stream-key-2024') }}"
tasks:
- name: Install Netdata on hosts where it's missing
block:
- name: Install Netdata via package manager (Fedora/RHEL)
dnf:
name: netdata
state: present
when: ansible_os_family == "RedHat"
- name: Install Netdata via package manager (Debian/Ubuntu)
apt:
name: netdata
state: present
update_cache: yes
when: ansible_os_family == "Debian"
- name: Install Netdata via installer script (fallback)
shell: |
curl -L https://get.netdata.cloud/kickstart.sh | bash -s -- --stable-channel --disable-telemetry --dont-wait
args:
creates: /usr/sbin/netdata
when: ansible_os_family not in ["RedHat", "Debian"]
when: inventory_hostname in ['surface', 'fedora', 'omvbackup']
- name: Configure Netdata Parent (Central monitoring node)
block:
- name: Create Netdata parent configuration
copy:
content: |
[global]
# Parent node configuration
memory mode = dbengine
page cache size = 32
dbengine disk space = 2048
# Network settings
bind to = *:19999
allow connections from = 192.168.50.0/24 100.64.0.0/10
# Disable local data collection plugins to reduce load
[plugins]
apps = no
cgroups = no
diskspace = no
[web]
# Allow dashboard from any host on home network
allow dashboard from = 192.168.50.0/24 100.64.0.0/10
# Stream configuration for receiving child data
[stream]
enabled = yes
destination =
api key = {{ netdata_stream_api_key }}
timeout seconds = 60
buffer size bytes = 1048576
reconnect delay seconds = 5
initial clock resync iterations = 60
# Accept streams from child nodes
[{{ netdata_stream_api_key }}]
enabled = yes
allow from = 192.168.50.0/24 100.64.0.0/10
default history = 3600
default memory = dbengine
health enabled by default = auto
dest: /etc/netdata/netdata.conf
backup: yes
- name: Enable and start Netdata parent
systemd:
name: netdata
enabled: yes
state: restarted
- name: Open firewall for Netdata (firewalld)
firewalld:
port: 19999/tcp
permanent: yes
state: enabled
immediate: yes
ignore_errors: yes
- name: Open firewall for Netdata (ufw)
ufw:
rule: allow
port: '19999'
proto: tcp
ignore_errors: yes
when: inventory_hostname == 'fedora'
- name: Configure Netdata Children (streaming to parent)
block:
- name: Configure Netdata child to stream to parent
copy:
content: |
[global]
# Child node - minimal local storage
memory mode = none
# Network settings
bind to = localhost:19999
[web]
# Restrict dashboard access to localhost only
allow dashboard from = localhost
# Stream all data to parent
[stream]
enabled = yes
destination = {{ netdata_parent_host }}:19999
api key = {{ netdata_stream_api_key }}
timeout seconds = 60
buffer size bytes = 1048576
reconnect delay seconds = 5
initial clock resync iterations = 60
# Send everything to parent
send charts matching = *
# Reduce resource usage
[plugins]
# Keep essential monitoring but reduce frequency
proc = yes
diskspace = yes
cgroups = yes
apps = yes
# Optimize collection intervals
[plugin:proc]
/proc/net/dev = yes
/proc/diskstats = yes
/proc/net/sockstat = yes
/proc/meminfo = yes
/proc/vmstat = yes
/proc/stat = yes
/proc/loadavg = yes
dest: /etc/netdata/netdata.conf
backup: yes
- name: Restart Netdata child
systemd:
name: netdata
state: restarted
when: inventory_hostname != 'fedora'
- name: Verify Netdata service status
systemd:
name: netdata
state: started
enabled: yes
register: netdata_status
- name: Display Netdata status
debug:
msg: |
Netdata {{ 'parent' if inventory_hostname == 'fedora' else 'child' }} configured on {{ inventory_hostname }}
Status: {{ netdata_status.status.ActiveState }}
{% if inventory_hostname == 'fedora' %}
Parent dashboard: http://{{ ansible_host }}:19999
{% else %}
Streaming to: {{ netdata_parent_host }}:19999
{% endif %}
- name: Test connectivity to parent (from children)
uri:
url: "http://{{ netdata_parent_host }}:19999/api/v1/info"
method: GET
timeout: 10
register: parent_test
ignore_errors: yes
when: inventory_hostname != 'fedora'
- name: Display connectivity test results
debug:
msg: |
Connection to parent: {{ 'SUCCESS' if parent_test.status == 200 else 'FAILED' }}
{% if parent_test.status != 200 %}
Error: {{ parent_test.msg | default('Unknown error') }}
{% endif %}
when: inventory_hostname != 'fedora' and parent_test is defined
handlers:
- name: restart netdata
systemd:
name: netdata
state: restarted