# FILE: infra/compose/production/monitoring.yaml # Production Monitoring Stack for AI Tax Agent # Deploy to: /opt/compose/ai-tax-agent/monitoring.yaml networks: frontend: external: true name: apa-frontend backend: external: true name: apa-backend volumes: prometheus_data: grafana_data: loki_data: services: # Metrics Collection apa-prometheus: image: prom/prometheus:v2.48.1 container_name: apa-prometheus restart: unless-stopped networks: - backend - frontend volumes: - prometheus_data:/prometheus - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro command: - "--config.file=/etc/prometheus/prometheus.yml" - "--storage.tsdb.path=/prometheus" - "--web.console.libraries=/etc/prometheus/console_libraries" - "--web.console.templates=/etc/prometheus/consoles" - "--storage.tsdb.retention.time=30d" - "--web.enable-lifecycle" labels: - "traefik.enable=true" - "traefik.http.routers.prometheus.rule=Host(`prometheus.${DOMAIN}`)" - "traefik.http.routers.prometheus.entrypoints=websecure" - "traefik.http.routers.prometheus.tls=true" - "traefik.http.routers.prometheus.tls.certresolver=godaddy" - "traefik.http.routers.prometheus.middlewares=authentik-forwardauth@file" - "traefik.http.services.prometheus.loadbalancer.server.port=9090" # Visualization & Dashboards apa-grafana: image: grafana/grafana:10.2.3 container_name: apa-grafana restart: unless-stopped networks: - backend - frontend volumes: - grafana_data:/var/lib/grafana - ./grafana/provisioning:/etc/grafana/provisioning:ro - ./grafana/dashboards:/var/lib/grafana/dashboards:ro environment: GF_SECURITY_ADMIN_PASSWORD: ${GRAFANA_PASSWORD} GF_USERS_ALLOW_SIGN_UP: false GF_USERS_AUTO_ASSIGN_ORG: true GF_USERS_AUTO_ASSIGN_ORG_ROLE: Viewer GF_AUTH_GENERIC_OAUTH_ENABLED: true GF_AUTH_GENERIC_OAUTH_NAME: Authentik GF_AUTH_GENERIC_OAUTH_CLIENT_ID: ${GRAFANA_OAUTH_CLIENT_ID} GF_AUTH_GENERIC_OAUTH_CLIENT_SECRET: ${GRAFANA_OAUTH_CLIENT_SECRET} GF_AUTH_GENERIC_OAUTH_SCOPES: openid profile email groups GF_AUTH_GENERIC_OAUTH_AUTH_URL: https://auth.${DOMAIN}/application/o/authorize/ GF_AUTH_GENERIC_OAUTH_TOKEN_URL: https://auth.${DOMAIN}/application/o/token/ GF_AUTH_GENERIC_OAUTH_API_URL: https://auth.${DOMAIN}/application/o/userinfo/ GF_AUTH_GENERIC_OAUTH_AUTO_LOGIN: false GF_AUTH_GENERIC_OAUTH_ALLOW_SIGN_UP: true GF_AUTH_GENERIC_OAUTH_ROLE_ATTRIBUTE_PATH: role GF_AUTH_GENERIC_OAUTH_ROLE_ATTRIBUTE_STRICT: false GF_AUTH_GENERIC_OAUTH_GROUPS_ATTRIBUTE_PATH: groups GF_AUTH_OAUTH_AUTO_LOGIN: false GF_AUTH_DISABLE_LOGIN_FORM: false GF_SERVER_ROOT_URL: https://grafana.${DOMAIN} GF_SERVER_SERVE_FROM_SUB_PATH: false GF_SECURITY_COOKIE_SECURE: true GF_SECURITY_COOKIE_SAMESITE: lax GF_AUTH_GENERIC_OAUTH_USE_PKCE: true labels: - "traefik.enable=true" - "traefik.http.routers.grafana.rule=Host(`grafana.${DOMAIN}`)" - "traefik.http.routers.grafana.entrypoints=websecure" - "traefik.http.routers.grafana.tls=true" - "traefik.http.routers.grafana.tls.certresolver=godaddy" - "traefik.http.services.grafana.loadbalancer.server.port=3000" # Log Aggregation apa-loki: image: grafana/loki:2.9.4 container_name: apa-loki restart: unless-stopped networks: - backend - frontend volumes: - loki_data:/loki - ./loki/loki.yml:/etc/loki/local-config.yaml:ro command: -config.file=/etc/loki/local-config.yaml labels: - "traefik.enable=true" - "traefik.http.routers.loki.rule=Host(`loki.${DOMAIN}`)" - "traefik.http.routers.loki.entrypoints=websecure" - "traefik.http.routers.loki.tls=true" - "traefik.http.routers.loki.tls.certresolver=godaddy" - "traefik.http.routers.loki.middlewares=authentik-forwardauth@file" - "traefik.http.services.loki.loadbalancer.server.port=3100" # Log Shipper (for Docker containers) apa-promtail: image: grafana/promtail:2.9.4 container_name: apa-promtail restart: unless-stopped networks: - backend volumes: - /var/log:/var/log:ro - /var/lib/docker/containers:/var/lib/docker/containers:ro - ./loki/promtail-config.yml:/etc/promtail/config.yml:ro command: -config.file=/etc/promtail/config.yml depends_on: - apa-loki