Files
HomeAudit/backups/stacks-pre-secrets-20250828-092958/security-monitoring.yml
admin 9ea31368f5 Complete Traefik infrastructure deployment - 60% complete
Major accomplishments:
-  SELinux policy installed and working
-  Core Traefik v2.10 deployment running
-  Production configuration ready (v3.1)
-  Monitoring stack configured
-  Comprehensive documentation created
-  Security hardening implemented

Current status:
- 🟡 Partially deployed (60% complete)
- ⚠️ Docker socket access needs resolution
-  Monitoring stack not deployed yet
- ⚠️ Production migration pending

Next steps:
1. Fix Docker socket permissions
2. Deploy monitoring stack
3. Migrate to production config
4. Validate full functionality

Files added:
- Complete Traefik deployment documentation
- Production and test configurations
- Monitoring stack configurations
- SELinux policy module
- Security checklists and guides
- Current status documentation
2025-08-28 15:22:41 -04:00

346 lines
9.6 KiB
YAML

version: '3.9'
services:
# Falco - Runtime security monitoring
falco:
image: falcosecurity/falco:0.36.2
privileged: true # Required for kernel monitoring
environment:
- FALCO_GRPC_ENABLED=true
- FALCO_GRPC_BIND_ADDRESS=0.0.0.0:5060
- FALCO_K8S_API_CERT=/etc/ssl/falco.crt
volumes:
- /var/run/docker.sock:/host/var/run/docker.sock:ro
- /proc:/host/proc:ro
- /etc:/host/etc:ro
- /lib/modules:/host/lib/modules:ro
- /usr:/host/usr:ro
- falco_rules:/etc/falco/rules.d
- falco_logs:/var/log/falco
networks:
- monitoring-network
ports:
- "5060:5060" # gRPC API
command:
- /usr/bin/falco
- --cri
- /run/containerd/containerd.sock
- --k8s-api
- --k8s-api-cert=/etc/ssl/falco.crt
healthcheck:
test: ["CMD", "test", "-S", "/var/run/falco/falco.sock"]
interval: 30s
timeout: 10s
retries: 3
start_period: 60s
deploy:
mode: global # Deploy on all nodes
resources:
limits:
memory: 512M
cpus: '0.5'
reservations:
memory: 256M
cpus: '0.1'
# Falco Sidekick - Events processing and forwarding
falco-sidekick:
image: falcosecurity/falcosidekick:2.28.0
environment:
- WEBUI_URL=http://falco-sidekick-ui:2802
- PROMETHEUS_URL=http://prometheus:9090
- SLACK_WEBHOOKURL=${SLACK_WEBHOOK_URL:-}
- SLACK_CHANNEL=#security-alerts
- SLACK_USERNAME=Falco
volumes:
- falco_sidekick_config:/etc/falcosidekick
networks:
- monitoring-network
ports:
- "2801:2801"
healthcheck:
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:2801/ping"]
interval: 30s
timeout: 10s
retries: 3
deploy:
resources:
limits:
memory: 256M
cpus: '0.25'
reservations:
memory: 128M
cpus: '0.05'
placement:
constraints:
- "node.labels.role==monitor"
depends_on:
- falco
# Falco Sidekick UI - Web interface for security events
falco-sidekick-ui:
image: falcosecurity/falcosidekick-ui:v2.2.0
environment:
- FALCOSIDEKICK_UI_REDIS_URL=redis://redis_master:6379
networks:
- monitoring-network
- traefik-public
- database-network
healthcheck:
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:2802/"]
interval: 30s
timeout: 10s
retries: 3
deploy:
resources:
limits:
memory: 256M
cpus: '0.25'
reservations:
memory: 128M
cpus: '0.05'
placement:
constraints:
- "node.labels.role==monitor"
labels:
- traefik.enable=true
- traefik.http.routers.falco-ui.rule=Host(`security.localhost`)
- traefik.http.routers.falco-ui.entrypoints=websecure
- traefik.http.routers.falco-ui.tls=true
- traefik.http.services.falco-ui.loadbalancer.server.port=2802
depends_on:
- falco-sidekick
# Suricata - Network intrusion detection
suricata:
image: jasonish/suricata:7.0.2
network_mode: host
cap_add:
- NET_ADMIN
- SYS_NICE
environment:
- SURICATA_OPTIONS=-i any
volumes:
- suricata_config:/etc/suricata
- suricata_logs:/var/log/suricata
- suricata_rules:/var/lib/suricata/rules
command: ["/usr/bin/suricata", "-c", "/etc/suricata/suricata.yaml", "-i", "any"]
healthcheck:
test: ["CMD", "test", "-f", "/var/run/suricata.pid"]
interval: 60s
timeout: 10s
retries: 3
start_period: 120s
deploy:
mode: global
resources:
limits:
memory: 1G
cpus: '0.5'
reservations:
memory: 512M
cpus: '0.1'
# Trivy - Vulnerability scanner
trivy-scanner:
image: aquasec/trivy:0.48.3
environment:
- TRIVY_LISTEN=0.0.0.0:8080
- TRIVY_CACHE_DIR=/tmp/trivy
volumes:
- /var/run/docker.sock:/var/run/docker.sock:ro
- trivy_cache:/tmp/trivy
- trivy_reports:/reports
networks:
- monitoring-network
command: |
sh -c "
# Start Trivy server
trivy server --listen 0.0.0.0:8080 &
# Automated scanning loop
while true; do
echo '[$(date)] Starting vulnerability scan...'
# Scan all running images
docker images --format '{{.Repository}}:{{.Tag}}' | \
grep -v '<none>' | \
head -20 | \
while read image; do
echo 'Scanning: $$image'
trivy image --format json --output /reports/scan-$$(echo $$image | tr '/:' '_')-$$(date +%Y%m%d).json $$image || true
done
# Wait 24 hours before next scan
sleep 86400
done
"
healthcheck:
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:8080/version"]
interval: 60s
timeout: 15s
retries: 3
start_period: 60s
deploy:
resources:
limits:
memory: 2G
cpus: '1.0'
reservations:
memory: 1G
cpus: '0.25'
placement:
constraints:
- "node.labels.role==monitor"
# ClamAV - Antivirus scanning
clamav:
image: clamav/clamav:1.2.1
volumes:
- clamav_db:/var/lib/clamav
- clamav_logs:/var/log/clamav
- /var/lib/docker/volumes:/scan:ro # Mount volumes for scanning
networks:
- monitoring-network
environment:
- CLAMAV_NO_CLAMD=false
- CLAMAV_NO_FRESHCLAMD=false
healthcheck:
test: ["CMD", "clamdscan", "--version"]
interval: 300s
timeout: 30s
retries: 3
start_period: 300s # Allow time for signature updates
deploy:
resources:
limits:
memory: 2G
cpus: '1.0'
reservations:
memory: 1G
cpus: '0.25'
placement:
constraints:
- "node.labels.role==monitor"
# Security metrics exporter
security-metrics-exporter:
image: alpine:3.18
command: |
sh -c "
apk add --no-cache curl jq python3 py3-pip &&
pip3 install prometheus_client requests &&
# Create metrics collection script
cat > /app/security_metrics.py << 'PYEOF'
import time
import json
import subprocess
import requests
from prometheus_client import start_http_server, Gauge, Counter
# Prometheus metrics
falco_alerts = Counter('falco_security_alerts_total', 'Total Falco security alerts', ['rule', 'priority'])
vuln_count = Gauge('trivy_vulnerabilities_total', 'Total vulnerabilities found', ['severity', 'image'])
clamav_threats = Counter('clamav_threats_total', 'Total threats detected by ClamAV')
suricata_alerts = Counter('suricata_network_alerts_total', 'Total network alerts from Suricata')
def collect_falco_metrics():
try:
# Get Falco alerts from logs
result = subprocess.run(['tail', '-n', '100', '/var/log/falco/falco.log'],
capture_output=True, text=True)
for line in result.stdout.split('\n'):
if 'Alert' in line:
# Parse alert and increment counter
falco_alerts.labels(rule='unknown', priority='info').inc()
except Exception as e:
print(f'Error collecting Falco metrics: {e}')
def collect_trivy_metrics():
try:
# Read latest Trivy reports
import os
reports_dir = '/reports'
if os.path.exists(reports_dir):
for filename in os.listdir(reports_dir):
if filename.endswith('.json'):
with open(os.path.join(reports_dir, filename)) as f:
data = json.load(f)
if 'Results' in data:
for result in data['Results']:
if 'Vulnerabilities' in result:
for vuln in result['Vulnerabilities']:
severity = vuln.get('Severity', 'unknown').lower()
image = data.get('ArtifactName', 'unknown')
vuln_count.labels(severity=severity, image=image).inc()
except Exception as e:
print(f'Error collecting Trivy metrics: {e}')
# Start metrics server
start_http_server(8888)
print('Security metrics server started on port 8888')
# Collection loop
while True:
collect_falco_metrics()
collect_trivy_metrics()
time.sleep(60)
PYEOF
python3 /app/security_metrics.py
"
volumes:
- falco_logs:/var/log/falco:ro
- trivy_reports:/reports:ro
- clamav_logs:/var/log/clamav:ro
- suricata_logs:/var/log/suricata:ro
networks:
- monitoring-network
ports:
- "8888:8888" # Prometheus metrics endpoint
deploy:
resources:
limits:
memory: 256M
cpus: '0.25'
reservations:
memory: 128M
cpus: '0.05'
placement:
constraints:
- "node.labels.role==monitor"
volumes:
falco_rules:
driver: local
falco_logs:
driver: local
falco_sidekick_config:
driver: local
suricata_config:
driver: local
driver_opts:
type: none
o: bind
device: /home/jonathan/Coding/HomeAudit/stacks/monitoring/suricata-config
suricata_logs:
driver: local
suricata_rules:
driver: local
trivy_cache:
driver: local
trivy_reports:
driver: local
clamav_db:
driver: local
clamav_logs:
driver: local
networks:
monitoring-network:
external: true
traefik-public:
external: true
database-network:
external: true