HomeAudit/stacks/monitoring/final-monitoring.yml

version: '3.9'
services:
  prometheus:
    image: prom/prometheus:v2.47.0
    command:
    - --config.file=/etc/prometheus/prometheus.yml
    - --storage.tsdb.path=/prometheus
    - --web.console.libraries=/etc/prometheus/console_libraries
    - --web.console.templates=/etc/prometheus/consoles
    - --storage.tsdb.retention.time=30d
    - --web.enable-lifecycle
    - --web.enable-admin-api
    volumes:
    - prometheus_data:/prometheus
    - /opt/configs/monitoring/prometheus-production.yml:/etc/prometheus/prometheus.yml:ro
    networks:
    - monitoring-network
    - caddy-public
    ports:
    - 9091:9090
    healthcheck:
      test:
      - CMD
      - wget
      - --no-verbose
      - --tries=1
      - --spider
      - http://localhost:9090/-/healthy
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 30s
    deploy:
      resources:
        limits:
          memory: 1G
          cpus: '0.5'
        reservations:
          memory: 512M
          cpus: '0.25'
      placement:
        constraints:
        - node.role == manager
      labels:
      - traefik.enable=true
      - traefik.http.routers.prometheus.rule=Host(`prometheus.pressmess.duckdns.org`)
      - traefik.http.routers.prometheus.entrypoints=websecure
      - traefik.http.routers.prometheus.tls=true
      - traefik.http.services.prometheus.loadbalancer.server.port=9090

  node-exporter:
    image: prom/node-exporter:v1.6.1
    command:
    - --path.procfs=/host/proc
    - --path.sysfs=/host/sys
    - --path.rootfs=/host/root
    - --web.listen-address=:9100
    - --collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)
    - --collector.filesystem.fs-types-exclude=^(sys|proc|auto)fs$$
    - --collector.netdev.device-exclude=^(lo|docker0|veth.*)$$
    volumes:
    - /proc:/host/proc:ro
    - /sys:/host/sys:ro
    - /:/host/root:ro
    networks:
    - monitoring-network
    ports:
    - 9100:9100
    healthcheck:
      test:
      - CMD
      - wget
      - --no-verbose
      - --tries=1
      - --spider
      - http://localhost:9100/-/healthy
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 30s
    deploy:
      resources:
        limits:
          memory: 256M
          cpus: '0.25'
        reservations:
          memory: 128M
          cpus: '0.1'
      placement:
        constraints:
        - node.role == manager

  blackbox-exporter:
    image: prom/blackbox-exporter:v0.24.0
    command:
    - --config.file=/etc/blackbox_exporter/blackbox.yml
    volumes:
    - /opt/configs/monitoring/blackbox.yml:/etc/blackbox_exporter/blackbox.yml:ro
    networks:
    - monitoring-network
    ports:
    - 9115:9115
    healthcheck:
      test:
      - CMD
      - wget
      - --no-verbose
      - --tries=1
      - --spider
      - http://localhost:9115/-/healthy
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 30s
    deploy:
      resources:
        limits:
          memory: 256M
          cpus: '0.25'
        reservations:
          memory: 128M
          cpus: '0.1'
      placement:
        constraints:
        - node.role == manager

  grafana:
    image: grafana/grafana:10.1.2
    environment:
      GF_PROVISIONING_PATH: /etc/grafana/provisioning
      GF_INSTALL_PLUGINS: grafana-clock-panel,grafana-simple-json-datasource,grafana-piechart-panel
      GF_FEATURE_TOGGLES_ENABLE: publicDashboards
      GF_SECURITY_ADMIN_PASSWORD: admin123
    volumes:
    - grafana_data:/var/lib/grafana
    - /opt/configs/monitoring/provisioning/datasources:/etc/grafana/provisioning/datasources:ro
    - /opt/configs/monitoring/provisioning/dashboards:/etc/grafana/provisioning/dashboards:ro
    networks:
    - monitoring-network
    - caddy-public
    ports:
    - 3002:3000
    healthcheck:
      test:
      - CMD-SHELL
      - curl -f http://localhost:3000/api/health || exit 1
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 60s
    deploy:
      resources:
        limits:
          memory: 1G
          cpus: '0.5'
        reservations:
          memory: 512M
          cpus: '0.25'
      placement:
        constraints:
        - node.role == manager
      labels:
      - traefik.enable=true
      - traefik.http.routers.grafana.rule=Host(`grafana.pressmess.duckdns.org`)
      - traefik.http.routers.grafana.entrypoints=websecure
      - traefik.http.routers.grafana.tls=true
      - traefik.http.services.grafana.loadbalancer.server.port=3000

volumes:
  prometheus_data:
    driver: local
  grafana_data:
    driver: local

networks:
  monitoring-network:
    external: true
  caddy-public:
    external: true