Major accomplishments: - ✅ SELinux policy installed and working - ✅ Core Traefik v2.10 deployment running - ✅ Production configuration ready (v3.1) - ✅ Monitoring stack configured - ✅ Comprehensive documentation created - ✅ Security hardening implemented Current status: - 🟡 Partially deployed (60% complete) - ⚠️ Docker socket access needs resolution - ❌ Monitoring stack not deployed yet - ⚠️ Production migration pending Next steps: 1. Fix Docker socket permissions 2. Deploy monitoring stack 3. Migrate to production config 4. Validate full functionality Files added: - Complete Traefik deployment documentation - Production and test configurations - Monitoring stack configurations - SELinux policy module - Security checklists and guides - Current status documentation
346 lines
9.6 KiB
YAML
346 lines
9.6 KiB
YAML
version: '3.9'
|
|
|
|
services:
|
|
# Falco - Runtime security monitoring
|
|
falco:
|
|
image: falcosecurity/falco:0.36.2
|
|
privileged: true # Required for kernel monitoring
|
|
environment:
|
|
- FALCO_GRPC_ENABLED=true
|
|
- FALCO_GRPC_BIND_ADDRESS=0.0.0.0:5060
|
|
- FALCO_K8S_API_CERT=/etc/ssl/falco.crt
|
|
volumes:
|
|
- /var/run/docker.sock:/host/var/run/docker.sock:ro
|
|
- /proc:/host/proc:ro
|
|
- /etc:/host/etc:ro
|
|
- /lib/modules:/host/lib/modules:ro
|
|
- /usr:/host/usr:ro
|
|
- falco_rules:/etc/falco/rules.d
|
|
- falco_logs:/var/log/falco
|
|
networks:
|
|
- monitoring-network
|
|
ports:
|
|
- "5060:5060" # gRPC API
|
|
command:
|
|
- /usr/bin/falco
|
|
- --cri
|
|
- /run/containerd/containerd.sock
|
|
- --k8s-api
|
|
- --k8s-api-cert=/etc/ssl/falco.crt
|
|
healthcheck:
|
|
test: ["CMD", "test", "-S", "/var/run/falco/falco.sock"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
start_period: 60s
|
|
deploy:
|
|
mode: global # Deploy on all nodes
|
|
resources:
|
|
limits:
|
|
memory: 512M
|
|
cpus: '0.5'
|
|
reservations:
|
|
memory: 256M
|
|
cpus: '0.1'
|
|
|
|
# Falco Sidekick - Events processing and forwarding
|
|
falco-sidekick:
|
|
image: falcosecurity/falcosidekick:2.28.0
|
|
environment:
|
|
- WEBUI_URL=http://falco-sidekick-ui:2802
|
|
- PROMETHEUS_URL=http://prometheus:9090
|
|
- SLACK_WEBHOOKURL=${SLACK_WEBHOOK_URL:-}
|
|
- SLACK_CHANNEL=#security-alerts
|
|
- SLACK_USERNAME=Falco
|
|
volumes:
|
|
- falco_sidekick_config:/etc/falcosidekick
|
|
networks:
|
|
- monitoring-network
|
|
ports:
|
|
- "2801:2801"
|
|
healthcheck:
|
|
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:2801/ping"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
deploy:
|
|
resources:
|
|
limits:
|
|
memory: 256M
|
|
cpus: '0.25'
|
|
reservations:
|
|
memory: 128M
|
|
cpus: '0.05'
|
|
placement:
|
|
constraints:
|
|
- "node.labels.role==monitor"
|
|
depends_on:
|
|
- falco
|
|
|
|
# Falco Sidekick UI - Web interface for security events
|
|
falco-sidekick-ui:
|
|
image: falcosecurity/falcosidekick-ui:v2.2.0
|
|
environment:
|
|
- FALCOSIDEKICK_UI_REDIS_URL=redis://redis_master:6379
|
|
networks:
|
|
- monitoring-network
|
|
- traefik-public
|
|
- database-network
|
|
healthcheck:
|
|
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:2802/"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
deploy:
|
|
resources:
|
|
limits:
|
|
memory: 256M
|
|
cpus: '0.25'
|
|
reservations:
|
|
memory: 128M
|
|
cpus: '0.05'
|
|
placement:
|
|
constraints:
|
|
- "node.labels.role==monitor"
|
|
labels:
|
|
- traefik.enable=true
|
|
- traefik.http.routers.falco-ui.rule=Host(`security.localhost`)
|
|
- traefik.http.routers.falco-ui.entrypoints=websecure
|
|
- traefik.http.routers.falco-ui.tls=true
|
|
- traefik.http.services.falco-ui.loadbalancer.server.port=2802
|
|
depends_on:
|
|
- falco-sidekick
|
|
|
|
# Suricata - Network intrusion detection
|
|
suricata:
|
|
image: jasonish/suricata:7.0.2
|
|
network_mode: host
|
|
cap_add:
|
|
- NET_ADMIN
|
|
- SYS_NICE
|
|
environment:
|
|
- SURICATA_OPTIONS=-i any
|
|
volumes:
|
|
- suricata_config:/etc/suricata
|
|
- suricata_logs:/var/log/suricata
|
|
- suricata_rules:/var/lib/suricata/rules
|
|
command: ["/usr/bin/suricata", "-c", "/etc/suricata/suricata.yaml", "-i", "any"]
|
|
healthcheck:
|
|
test: ["CMD", "test", "-f", "/var/run/suricata.pid"]
|
|
interval: 60s
|
|
timeout: 10s
|
|
retries: 3
|
|
start_period: 120s
|
|
deploy:
|
|
mode: global
|
|
resources:
|
|
limits:
|
|
memory: 1G
|
|
cpus: '0.5'
|
|
reservations:
|
|
memory: 512M
|
|
cpus: '0.1'
|
|
|
|
# Trivy - Vulnerability scanner
|
|
trivy-scanner:
|
|
image: aquasec/trivy:0.48.3
|
|
environment:
|
|
- TRIVY_LISTEN=0.0.0.0:8080
|
|
- TRIVY_CACHE_DIR=/tmp/trivy
|
|
volumes:
|
|
- /var/run/docker.sock:/var/run/docker.sock:ro
|
|
- trivy_cache:/tmp/trivy
|
|
- trivy_reports:/reports
|
|
networks:
|
|
- monitoring-network
|
|
command: |
|
|
sh -c "
|
|
# Start Trivy server
|
|
trivy server --listen 0.0.0.0:8080 &
|
|
|
|
# Automated scanning loop
|
|
while true; do
|
|
echo '[$(date)] Starting vulnerability scan...'
|
|
|
|
# Scan all running images
|
|
docker images --format '{{.Repository}}:{{.Tag}}' | \
|
|
grep -v '<none>' | \
|
|
head -20 | \
|
|
while read image; do
|
|
echo 'Scanning: $$image'
|
|
trivy image --format json --output /reports/scan-$$(echo $$image | tr '/:' '_')-$$(date +%Y%m%d).json $$image || true
|
|
done
|
|
|
|
# Wait 24 hours before next scan
|
|
sleep 86400
|
|
done
|
|
"
|
|
healthcheck:
|
|
test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:8080/version"]
|
|
interval: 60s
|
|
timeout: 15s
|
|
retries: 3
|
|
start_period: 60s
|
|
deploy:
|
|
resources:
|
|
limits:
|
|
memory: 2G
|
|
cpus: '1.0'
|
|
reservations:
|
|
memory: 1G
|
|
cpus: '0.25'
|
|
placement:
|
|
constraints:
|
|
- "node.labels.role==monitor"
|
|
|
|
# ClamAV - Antivirus scanning
|
|
clamav:
|
|
image: clamav/clamav:1.2.1
|
|
volumes:
|
|
- clamav_db:/var/lib/clamav
|
|
- clamav_logs:/var/log/clamav
|
|
- /var/lib/docker/volumes:/scan:ro # Mount volumes for scanning
|
|
networks:
|
|
- monitoring-network
|
|
environment:
|
|
- CLAMAV_NO_CLAMD=false
|
|
- CLAMAV_NO_FRESHCLAMD=false
|
|
healthcheck:
|
|
test: ["CMD", "clamdscan", "--version"]
|
|
interval: 300s
|
|
timeout: 30s
|
|
retries: 3
|
|
start_period: 300s # Allow time for signature updates
|
|
deploy:
|
|
resources:
|
|
limits:
|
|
memory: 2G
|
|
cpus: '1.0'
|
|
reservations:
|
|
memory: 1G
|
|
cpus: '0.25'
|
|
placement:
|
|
constraints:
|
|
- "node.labels.role==monitor"
|
|
|
|
# Security metrics exporter
|
|
security-metrics-exporter:
|
|
image: alpine:3.18
|
|
command: |
|
|
sh -c "
|
|
apk add --no-cache curl jq python3 py3-pip &&
|
|
pip3 install prometheus_client requests &&
|
|
|
|
# Create metrics collection script
|
|
cat > /app/security_metrics.py << 'PYEOF'
|
|
import time
|
|
import json
|
|
import subprocess
|
|
import requests
|
|
from prometheus_client import start_http_server, Gauge, Counter
|
|
|
|
# Prometheus metrics
|
|
falco_alerts = Counter('falco_security_alerts_total', 'Total Falco security alerts', ['rule', 'priority'])
|
|
vuln_count = Gauge('trivy_vulnerabilities_total', 'Total vulnerabilities found', ['severity', 'image'])
|
|
clamav_threats = Counter('clamav_threats_total', 'Total threats detected by ClamAV')
|
|
suricata_alerts = Counter('suricata_network_alerts_total', 'Total network alerts from Suricata')
|
|
|
|
def collect_falco_metrics():
|
|
try:
|
|
# Get Falco alerts from logs
|
|
result = subprocess.run(['tail', '-n', '100', '/var/log/falco/falco.log'],
|
|
capture_output=True, text=True)
|
|
for line in result.stdout.split('\n'):
|
|
if 'Alert' in line:
|
|
# Parse alert and increment counter
|
|
falco_alerts.labels(rule='unknown', priority='info').inc()
|
|
except Exception as e:
|
|
print(f'Error collecting Falco metrics: {e}')
|
|
|
|
def collect_trivy_metrics():
|
|
try:
|
|
# Read latest Trivy reports
|
|
import os
|
|
reports_dir = '/reports'
|
|
if os.path.exists(reports_dir):
|
|
for filename in os.listdir(reports_dir):
|
|
if filename.endswith('.json'):
|
|
with open(os.path.join(reports_dir, filename)) as f:
|
|
data = json.load(f)
|
|
if 'Results' in data:
|
|
for result in data['Results']:
|
|
if 'Vulnerabilities' in result:
|
|
for vuln in result['Vulnerabilities']:
|
|
severity = vuln.get('Severity', 'unknown').lower()
|
|
image = data.get('ArtifactName', 'unknown')
|
|
vuln_count.labels(severity=severity, image=image).inc()
|
|
except Exception as e:
|
|
print(f'Error collecting Trivy metrics: {e}')
|
|
|
|
# Start metrics server
|
|
start_http_server(8888)
|
|
print('Security metrics server started on port 8888')
|
|
|
|
# Collection loop
|
|
while True:
|
|
collect_falco_metrics()
|
|
collect_trivy_metrics()
|
|
time.sleep(60)
|
|
PYEOF
|
|
|
|
python3 /app/security_metrics.py
|
|
"
|
|
volumes:
|
|
- falco_logs:/var/log/falco:ro
|
|
- trivy_reports:/reports:ro
|
|
- clamav_logs:/var/log/clamav:ro
|
|
- suricata_logs:/var/log/suricata:ro
|
|
networks:
|
|
- monitoring-network
|
|
ports:
|
|
- "8888:8888" # Prometheus metrics endpoint
|
|
deploy:
|
|
resources:
|
|
limits:
|
|
memory: 256M
|
|
cpus: '0.25'
|
|
reservations:
|
|
memory: 128M
|
|
cpus: '0.05'
|
|
placement:
|
|
constraints:
|
|
- "node.labels.role==monitor"
|
|
|
|
volumes:
|
|
falco_rules:
|
|
driver: local
|
|
falco_logs:
|
|
driver: local
|
|
falco_sidekick_config:
|
|
driver: local
|
|
suricata_config:
|
|
driver: local
|
|
driver_opts:
|
|
type: none
|
|
o: bind
|
|
device: /home/jonathan/Coding/HomeAudit/stacks/monitoring/suricata-config
|
|
suricata_logs:
|
|
driver: local
|
|
suricata_rules:
|
|
driver: local
|
|
trivy_cache:
|
|
driver: local
|
|
trivy_reports:
|
|
driver: local
|
|
clamav_db:
|
|
driver: local
|
|
clamav_logs:
|
|
driver: local
|
|
|
|
networks:
|
|
monitoring-network:
|
|
external: true
|
|
traefik-public:
|
|
external: true
|
|
database-network:
|
|
external: true |