Files
ai-tax-agent/infra/base/monitoring.yaml
harkon f0f7674b8d
Some checks failed
CI/CD Pipeline / Code Quality & Linting (push) Has been cancelled
CI/CD Pipeline / Policy Validation (push) Has been cancelled
CI/CD Pipeline / Test Suite (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-firm-connectors) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-forms) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-hmrc) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ingestion) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-normalize-map) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ocr) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-indexer) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-reason) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rpa) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (ui-review) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (ui-review) (push) Has been cancelled
CI/CD Pipeline / Generate SBOM (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Notifications (push) Has been cancelled
clean up base infra
2025-10-11 11:42:43 +01:00

126 lines
4.5 KiB
YAML

# FILE: infra/compose/production/monitoring.yaml
# Production Monitoring Stack for AI Tax Agent
# Deploy to: /opt/compose/ai-tax-agent/monitoring.yaml
networks:
frontend:
external: true
name: apa-frontend
backend:
external: true
name: apa-backend
volumes:
prometheus_data:
grafana_data:
loki_data:
services:
# Metrics Collection
apa-prometheus:
image: prom/prometheus:v2.48.1
container_name: apa-prometheus
restart: unless-stopped
networks:
- backend
- frontend
volumes:
- prometheus_data:/prometheus
- ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro
command:
- "--config.file=/etc/prometheus/prometheus.yml"
- "--storage.tsdb.path=/prometheus"
- "--web.console.libraries=/etc/prometheus/console_libraries"
- "--web.console.templates=/etc/prometheus/consoles"
- "--storage.tsdb.retention.time=30d"
- "--web.enable-lifecycle"
labels:
- "traefik.enable=true"
- "traefik.http.routers.prometheus.rule=Host(`prometheus.${DOMAIN}`)"
- "traefik.http.routers.prometheus.entrypoints=websecure"
- "traefik.http.routers.prometheus.tls=true"
- "traefik.http.routers.prometheus.tls.certresolver=godaddy"
- "traefik.http.routers.prometheus.middlewares=authentik-forwardauth@file"
- "traefik.http.services.prometheus.loadbalancer.server.port=9090"
# Visualization & Dashboards
apa-grafana:
image: grafana/grafana:10.2.3
container_name: apa-grafana
restart: unless-stopped
networks:
- backend
- frontend
volumes:
- grafana_data:/var/lib/grafana
- ./grafana/provisioning:/etc/grafana/provisioning:ro
- ./grafana/dashboards:/var/lib/grafana/dashboards:ro
environment:
GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_PASSWORD}
GF_USERS_ALLOW_SIGN_UP: false
GF_USERS_AUTO_ASSIGN_ORG: true
GF_USERS_AUTO_ASSIGN_ORG_ROLE: Viewer
GF_AUTH_GENERIC_OAUTH_ENABLED: true
GF_AUTH_GENERIC_OAUTH_NAME: Authentik
GF_AUTH_GENERIC_OAUTH_CLIENT_ID: ${GRAFANA_OAUTH_CLIENT_ID}
GF_AUTH_GENERIC_OAUTH_CLIENT_SECRET: ${GRAFANA_OAUTH_CLIENT_SECRET}
GF_AUTH_GENERIC_OAUTH_SCOPES: openid profile email groups
GF_AUTH_GENERIC_OAUTH_AUTH_URL: https://auth.${DOMAIN}/application/o/authorize/
GF_AUTH_GENERIC_OAUTH_TOKEN_URL: https://auth.${DOMAIN}/application/o/token/
GF_AUTH_GENERIC_OAUTH_API_URL: https://auth.${DOMAIN}/application/o/userinfo/
GF_AUTH_GENERIC_OAUTH_AUTO_LOGIN: false
GF_AUTH_GENERIC_OAUTH_ALLOW_SIGN_UP: true
GF_AUTH_GENERIC_OAUTH_ROLE_ATTRIBUTE_PATH: role
GF_AUTH_GENERIC_OAUTH_ROLE_ATTRIBUTE_STRICT: false
GF_AUTH_GENERIC_OAUTH_GROUPS_ATTRIBUTE_PATH: groups
GF_AUTH_OAUTH_AUTO_LOGIN: false
GF_AUTH_DISABLE_LOGIN_FORM: false
GF_SERVER_ROOT_URL: https://grafana.${DOMAIN}
GF_SERVER_SERVE_FROM_SUB_PATH: false
GF_SECURITY_COOKIE_SECURE: true
GF_SECURITY_COOKIE_SAMESITE: lax
GF_AUTH_GENERIC_OAUTH_USE_PKCE: true
labels:
- "traefik.enable=true"
- "traefik.http.routers.grafana.rule=Host(`grafana.${DOMAIN}`)"
- "traefik.http.routers.grafana.entrypoints=websecure"
- "traefik.http.routers.grafana.tls=true"
- "traefik.http.routers.grafana.tls.certresolver=godaddy"
- "traefik.http.services.grafana.loadbalancer.server.port=3000"
# Log Aggregation
apa-loki:
image: grafana/loki:2.9.4
container_name: apa-loki
restart: unless-stopped
networks:
- backend
- frontend
volumes:
- loki_data:/loki
- ./loki/loki.yml:/etc/loki/local-config.yaml:ro
command: -config.file=/etc/loki/local-config.yaml
labels:
- "traefik.enable=true"
- "traefik.http.routers.loki.rule=Host(`loki.${DOMAIN}`)"
- "traefik.http.routers.loki.entrypoints=websecure"
- "traefik.http.routers.loki.tls=true"
- "traefik.http.routers.loki.tls.certresolver=godaddy"
- "traefik.http.routers.loki.middlewares=authentik-forwardauth@file"
- "traefik.http.services.loki.loadbalancer.server.port=3100"
# Log Shipper (for Docker containers)
apa-promtail:
image: grafana/promtail:2.9.4
container_name: apa-promtail
restart: unless-stopped
networks:
- backend
volumes:
- /var/log:/var/log:ro
- /var/lib/docker/containers:/var/lib/docker/containers:ro
- ./loki/promtail-config.yml:/etc/promtail/config.yml:ro
command: -config.file=/etc/promtail/config.yml
depends_on:
- apa-loki