version: '3.9' services: # Falco - Runtime security monitoring falco: image: falcosecurity/falco:0.36.2 privileged: true # Required for kernel monitoring environment: - FALCO_GRPC_ENABLED=true - FALCO_GRPC_BIND_ADDRESS=0.0.0.0:5060 - FALCO_K8S_API_CERT=/etc/ssl/falco.crt volumes: - /var/run/docker.sock:/host/var/run/docker.sock:ro - /proc:/host/proc:ro - /etc:/host/etc:ro - /lib/modules:/host/lib/modules:ro - /usr:/host/usr:ro - falco_rules:/etc/falco/rules.d - falco_logs:/var/log/falco networks: - monitoring-network ports: - "5060:5060" # gRPC API command: - /usr/bin/falco - --cri - /run/containerd/containerd.sock - --k8s-api - --k8s-api-cert=/etc/ssl/falco.crt healthcheck: test: ["CMD", "test", "-S", "/var/run/falco/falco.sock"] interval: 30s timeout: 10s retries: 3 start_period: 60s deploy: mode: global # Deploy on all nodes resources: limits: memory: 512M cpus: '0.5' reservations: memory: 256M cpus: '0.1' # Falco Sidekick - Events processing and forwarding falco-sidekick: image: falcosecurity/falcosidekick:2.28.0 environment: - WEBUI_URL=http://falco-sidekick-ui:2802 - PROMETHEUS_URL=http://prometheus:9090 - SLACK_WEBHOOKURL=${SLACK_WEBHOOK_URL:-} - SLACK_CHANNEL=#security-alerts - SLACK_USERNAME=Falco volumes: - falco_sidekick_config:/etc/falcosidekick networks: - monitoring-network ports: - "2801:2801" healthcheck: test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:2801/ping"] interval: 30s timeout: 10s retries: 3 deploy: resources: limits: memory: 256M cpus: '0.25' reservations: memory: 128M cpus: '0.05' placement: constraints: - "node.labels.role==monitor" depends_on: - falco # Falco Sidekick UI - Web interface for security events falco-sidekick-ui: image: falcosecurity/falcosidekick-ui:v2.2.0 environment: - FALCOSIDEKICK_UI_REDIS_URL=redis://redis_master:6379 networks: - monitoring-network - traefik-public - database-network healthcheck: test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:2802/"] interval: 30s timeout: 10s retries: 3 deploy: resources: limits: memory: 256M cpus: '0.25' reservations: memory: 128M cpus: '0.05' placement: constraints: - "node.labels.role==monitor" labels: - traefik.enable=true - traefik.http.routers.falco-ui.rule=Host(`security.localhost`) - traefik.http.routers.falco-ui.entrypoints=websecure - traefik.http.routers.falco-ui.tls=true - traefik.http.services.falco-ui.loadbalancer.server.port=2802 depends_on: - falco-sidekick # Suricata - Network intrusion detection suricata: image: jasonish/suricata:7.0.2 network_mode: host cap_add: - NET_ADMIN - SYS_NICE environment: - SURICATA_OPTIONS=-i any volumes: - suricata_config:/etc/suricata - suricata_logs:/var/log/suricata - suricata_rules:/var/lib/suricata/rules command: ["/usr/bin/suricata", "-c", "/etc/suricata/suricata.yaml", "-i", "any"] healthcheck: test: ["CMD", "test", "-f", "/var/run/suricata.pid"] interval: 60s timeout: 10s retries: 3 start_period: 120s deploy: mode: global resources: limits: memory: 1G cpus: '0.5' reservations: memory: 512M cpus: '0.1' # Trivy - Vulnerability scanner trivy-scanner: image: aquasec/trivy:0.48.3 environment: - TRIVY_LISTEN=0.0.0.0:8080 - TRIVY_CACHE_DIR=/tmp/trivy volumes: - /var/run/docker.sock:/var/run/docker.sock:ro - trivy_cache:/tmp/trivy - trivy_reports:/reports networks: - monitoring-network command: | sh -c " # Start Trivy server trivy server --listen 0.0.0.0:8080 & # Automated scanning loop while true; do echo '[$(date)] Starting vulnerability scan...' # Scan all running images docker images --format '{{.Repository}}:{{.Tag}}' | \ grep -v '' | \ head -20 | \ while read image; do echo 'Scanning: $$image' trivy image --format json --output /reports/scan-$$(echo $$image | tr '/:' '_')-$$(date +%Y%m%d).json $$image || true done # Wait 24 hours before next scan sleep 86400 done " healthcheck: test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:8080/version"] interval: 60s timeout: 15s retries: 3 start_period: 60s deploy: resources: limits: memory: 2G cpus: '1.0' reservations: memory: 1G cpus: '0.25' placement: constraints: - "node.labels.role==monitor" # ClamAV - Antivirus scanning clamav: image: clamav/clamav:1.2.1 volumes: - clamav_db:/var/lib/clamav - clamav_logs:/var/log/clamav - /var/lib/docker/volumes:/scan:ro # Mount volumes for scanning networks: - monitoring-network environment: - CLAMAV_NO_CLAMD=false - CLAMAV_NO_FRESHCLAMD=false healthcheck: test: ["CMD", "clamdscan", "--version"] interval: 300s timeout: 30s retries: 3 start_period: 300s # Allow time for signature updates deploy: resources: limits: memory: 2G cpus: '1.0' reservations: memory: 1G cpus: '0.25' placement: constraints: - "node.labels.role==monitor" # Security metrics exporter security-metrics-exporter: image: alpine:3.18 command: | sh -c " apk add --no-cache curl jq python3 py3-pip && pip3 install prometheus_client requests && # Create metrics collection script cat > /app/security_metrics.py << 'PYEOF' import time import json import subprocess import requests from prometheus_client import start_http_server, Gauge, Counter # Prometheus metrics falco_alerts = Counter('falco_security_alerts_total', 'Total Falco security alerts', ['rule', 'priority']) vuln_count = Gauge('trivy_vulnerabilities_total', 'Total vulnerabilities found', ['severity', 'image']) clamav_threats = Counter('clamav_threats_total', 'Total threats detected by ClamAV') suricata_alerts = Counter('suricata_network_alerts_total', 'Total network alerts from Suricata') def collect_falco_metrics(): try: # Get Falco alerts from logs result = subprocess.run(['tail', '-n', '100', '/var/log/falco/falco.log'], capture_output=True, text=True) for line in result.stdout.split('\n'): if 'Alert' in line: # Parse alert and increment counter falco_alerts.labels(rule='unknown', priority='info').inc() except Exception as e: print(f'Error collecting Falco metrics: {e}') def collect_trivy_metrics(): try: # Read latest Trivy reports import os reports_dir = '/reports' if os.path.exists(reports_dir): for filename in os.listdir(reports_dir): if filename.endswith('.json'): with open(os.path.join(reports_dir, filename)) as f: data = json.load(f) if 'Results' in data: for result in data['Results']: if 'Vulnerabilities' in result: for vuln in result['Vulnerabilities']: severity = vuln.get('Severity', 'unknown').lower() image = data.get('ArtifactName', 'unknown') vuln_count.labels(severity=severity, image=image).inc() except Exception as e: print(f'Error collecting Trivy metrics: {e}') # Start metrics server start_http_server(8888) print('Security metrics server started on port 8888') # Collection loop while True: collect_falco_metrics() collect_trivy_metrics() time.sleep(60) PYEOF python3 /app/security_metrics.py " volumes: - falco_logs:/var/log/falco:ro - trivy_reports:/reports:ro - clamav_logs:/var/log/clamav:ro - suricata_logs:/var/log/suricata:ro networks: - monitoring-network ports: - "8888:8888" # Prometheus metrics endpoint deploy: resources: limits: memory: 256M cpus: '0.25' reservations: memory: 128M cpus: '0.05' placement: constraints: - "node.labels.role==monitor" volumes: falco_rules: driver: local falco_logs: driver: local falco_sidekick_config: driver: local suricata_config: driver: local driver_opts: type: none o: bind device: /home/jonathan/Coding/HomeAudit/stacks/monitoring/suricata-config suricata_logs: driver: local suricata_rules: driver: local trivy_cache: driver: local trivy_reports: driver: local clamav_db: driver: local clamav_logs: driver: local networks: monitoring-network: external: true traefik-public: external: true database-network: external: true