Files
ai-tax-agent/apps/svc_coverage/main.py
harkon b324ff09ef
Some checks failed
CI/CD Pipeline / Code Quality & Linting (push) Has been cancelled
CI/CD Pipeline / Policy Validation (push) Has been cancelled
CI/CD Pipeline / Test Suite (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-firm-connectors) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-forms) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-hmrc) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ingestion) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-normalize-map) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ocr) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-indexer) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-reason) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rpa) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (ui-review) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (ui-review) (push) Has been cancelled
CI/CD Pipeline / Generate SBOM (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Notifications (push) Has been cancelled
Initial commit
2025-10-11 08:41:36 +01:00

524 lines
16 KiB
Python

# FILE: apps/svc-coverage/main.py
# Coverage policy service with evaluation, clarification, and hot reload
import os
import sys
from typing import Any
import structlog
from fastapi import Depends, HTTPException
from pydantic import BaseModel
sys.path.append(os.path.join(os.path.dirname(__file__), "..", ".."))
from libs.app_factory import create_app
from libs.config import BaseAppSettings, create_event_bus, create_neo4j_client
from libs.coverage import CoverageEvaluator
from libs.events import EventBus
from libs.neo import Neo4jClient
from libs.observability import get_metrics, get_tracer, setup_observability
from libs.policy import PolicyLoader, get_policy_loader
from libs.schemas import (
ClarifyContext,
ClarifyResponse,
CoverageGap,
CoverageReport,
PolicyError,
UploadOption,
ValidationResult,
)
from libs.security import get_current_user, get_tenant_id
logger = structlog.get_logger()
async def http_exception_handler(_request, exc) -> dict[str, str | int]:
"""Handle HTTP exceptions"""
return {"detail": exc.detail, "status_code": exc.status_code}
class CoverageSettings(BaseAppSettings):
"""Settings for Coverage service"""
service_name: str = "svc-coverage"
# Policy configuration
config_dir: str = "config"
policy_reload_enabled: bool = True
# Database
postgres_url: str = "postgresql://user:pass@localhost:5432/coverage"
# External services
rag_service_url: str = "http://svc-rag-retriever:8000"
# Create app and settings
app, settings = create_app(
service_name="svc-coverage",
title="Tax Agent Coverage Policy Service",
description="Coverage policy evaluation and clarification service",
settings_class=CoverageSettings,
)
# Global state
neo4j_client: Neo4jClient | None = None
event_bus: EventBus | None = None
policy_loader: PolicyLoader | None = None
current_policy: Any = None
@app.on_event("startup")
async def startup_event() -> None:
"""Initialize service dependencies"""
global neo4j_client, event_bus, policy_loader, current_policy
# Setup observability
setup_observability(settings)
# Initialize Neo4j client
neo4j_driver = create_neo4j_client(settings)
neo4j_client = Neo4jClient(neo4j_driver)
# Initialize event bus
event_bus = create_event_bus(settings)
# Initialize policy loader
policy_loader = get_policy_loader(settings.config_dir)
# Load initial policy
try:
policy = policy_loader.load_policy()
current_policy = policy_loader.compile_predicates(policy)
logger.info("Initial policy loaded", version=policy.version)
except Exception as e:
logger.error("Failed to load initial policy", error=str(e))
current_policy = None
logger.info("Coverage service started")
@app.on_event("shutdown")
async def shutdown_event() -> None:
"""Cleanup service dependencies"""
global neo4j_client, event_bus
if neo4j_client:
await neo4j_client.close()
if event_bus:
await event_bus.close()
logger.info("Coverage service stopped")
# Request/Response models
class CheckCoverageRequest(BaseModel):
"""Request to check document coverage"""
tax_year: str
taxpayer_id: str
class ClarifyRequest(BaseModel):
"""Request to generate clarifying question"""
gap: CoverageGap
context: ClarifyContext
class ReloadRequest(BaseModel):
"""Request to reload policy"""
force: bool = False
# Metrics
metrics = get_metrics()
tracer = get_tracer()
@app.post("/v1/coverage/check")
async def check_coverage(
request: CheckCoverageRequest,
current_user: dict[str, Any] = Depends(get_current_user),
tenant_id: str = Depends(get_tenant_id),
) -> CoverageReport:
"""Check document coverage for taxpayer"""
with tracer.start_as_current_span("check_coverage") as span:
span.set_attribute("taxpayer_id", request.taxpayer_id)
span.set_attribute("tax_year", request.tax_year)
span.set_attribute("tenant_id", tenant_id)
try:
if not current_policy:
raise HTTPException(status_code=503, detail="Policy not loaded")
# Create evaluator with KG and RAG clients
evaluator = CoverageEvaluator(
kg_client=neo4j_client,
rag_client=None, # TODO: Initialize RAG client
)
# Perform coverage evaluation
report = await evaluator.check_document_coverage(
request.taxpayer_id,
request.tax_year,
current_policy,
)
# Record audit trail
await _record_coverage_audit(report, tenant_id)
# Update metrics
metrics.counter("coverage_checks_total").labels(
tenant_id=tenant_id,
tax_year=request.tax_year,
overall_status=report.overall_status.value,
).inc()
return report
except HTTPException:
# Re-raise HTTP exceptions as-is
raise
except Exception as e:
logger.error(
"Coverage check failed",
taxpayer_id=request.taxpayer_id,
tax_year=request.tax_year,
error=str(e),
)
raise HTTPException(
status_code=500, detail=f"Coverage check failed: {str(e)}"
) from e
@app.post("/v1/coverage/clarify")
async def clarify_gap(
request: ClarifyRequest,
current_user: dict[str, Any] = Depends(get_current_user),
tenant_id: str = Depends(get_tenant_id),
) -> ClarifyResponse:
"""Generate clarifying question for coverage gap"""
with tracer.start_as_current_span("clarify_gap") as span:
span.set_attribute("schedule_id", request.gap.schedule_id)
span.set_attribute("evidence_id", request.gap.evidence_id)
span.set_attribute("tenant_id", tenant_id)
try:
if not current_policy:
raise HTTPException(status_code=503, detail="Policy not loaded")
# Generate clarifying question
response = await _generate_clarifying_question(request.gap, request.context)
# Update metrics
metrics.counter("clarifications_total").labels(
tenant_id=tenant_id,
schedule_id=request.gap.schedule_id,
evidence_id=request.gap.evidence_id,
).inc()
return response
except HTTPException:
# Re-raise HTTP exceptions as-is
raise
except Exception as e:
logger.error(
"Clarification failed",
gap=request.gap.dict(),
error=str(e),
)
raise HTTPException(
status_code=500, detail=f"Clarification failed: {str(e)}"
) from e
@app.post("/admin/coverage/reload")
async def reload_policy(
request: ReloadRequest,
current_user: dict[str, Any] = Depends(get_current_user),
tenant_id: str = Depends(get_tenant_id),
) -> dict[str, Any]:
"""Reload coverage policy from files"""
# Check admin permissions
user_groups = current_user.get("groups", [])
if "admin" not in user_groups:
raise HTTPException(status_code=403, detail="Admin access required")
with tracer.start_as_current_span("reload_policy") as span:
span.set_attribute("tenant_id", tenant_id)
span.set_attribute("force", request.force)
try:
global current_policy
if not policy_loader:
raise HTTPException(
status_code=503, detail="Policy loader not initialized"
)
# Load and compile new policy
policy = policy_loader.load_policy()
new_compiled_policy = policy_loader.compile_predicates(policy)
# Record new policy version
await _record_policy_version(new_compiled_policy, tenant_id)
# Update current policy
current_policy = new_compiled_policy
logger.info(
"Policy reloaded",
version=policy.version,
hash=new_compiled_policy.hash,
tenant_id=tenant_id,
)
return {
"success": True,
"version": policy.version,
"hash": new_compiled_policy.hash,
"compiled_at": new_compiled_policy.compiled_at.isoformat(),
"source_files": new_compiled_policy.source_files,
}
except PolicyError as e:
logger.error("Policy reload failed", error=str(e))
raise HTTPException(
status_code=400, detail=f"Policy error: {str(e)}"
) from e
except Exception as e:
logger.error("Policy reload failed", error=str(e))
raise HTTPException(
status_code=500, detail=f"Reload failed: {str(e)}"
) from e
@app.get("/v1/coverage/policy")
async def get_current_policy(
current_user: dict[str, Any] = Depends(get_current_user),
tenant_id: str = Depends(get_tenant_id),
) -> dict[str, Any]:
"""Get current compiled policy (no secrets, no PII)"""
with tracer.start_as_current_span("get_policy") as span:
span.set_attribute("tenant_id", tenant_id)
if not current_policy:
raise HTTPException(status_code=503, detail="Policy not loaded")
# Return sanitized policy info
return {
"version": current_policy.policy.version,
"jurisdiction": current_policy.policy.jurisdiction,
"tax_year": current_policy.policy.tax_year,
"compiled_at": current_policy.compiled_at.isoformat(),
"hash": current_policy.hash,
"source_files": current_policy.source_files,
"schedules": list(current_policy.policy.schedules.keys()),
"document_kinds": current_policy.policy.document_kinds,
}
@app.get("/v1/coverage/validate")
async def validate_policy(
current_user: dict[str, Any] = Depends(get_current_user),
tenant_id: str = Depends(get_tenant_id),
) -> ValidationResult:
"""Validate current policy configuration"""
with tracer.start_as_current_span("validate_policy") as span:
span.set_attribute("tenant_id", tenant_id)
try:
if not policy_loader:
raise HTTPException(
status_code=503, detail="Policy loader not initialized"
)
# Load policy as dict for validation
policy_dict = policy_loader._load_yaml_file(
os.path.join(settings.config_dir, "coverage.yaml")
)
# Validate policy
result = policy_loader.validate_policy(policy_dict)
# Additional validation: check box existence in KG
if neo4j_client and result.ok:
box_validation_errors = await _validate_boxes_in_kg(policy_dict)
if box_validation_errors:
result.errors.extend(box_validation_errors)
result.ok = False
return result
except Exception as e:
logger.error("Policy validation failed", error=str(e))
return ValidationResult(
ok=False,
errors=[f"Validation failed: {str(e)}"],
)
# Helper functions
async def _record_coverage_audit(report: CoverageReport, tenant_id: str) -> None:
"""Record coverage audit trail"""
# TODO: Implement database recording
logger.info(
"Coverage audit recorded",
taxpayer_id=report.taxpayer_id,
tax_year=report.tax_year,
overall_status=report.overall_status.value,
blocking_items=len(report.blocking_items),
tenant_id=tenant_id,
)
async def _record_policy_version(compiled_policy: Any, tenant_id: str) -> None:
"""Record new policy version"""
# TODO: Implement database recording
logger.info(
"Policy version recorded",
version=compiled_policy.policy.version,
hash=compiled_policy.hash,
tenant_id=tenant_id,
)
async def _generate_clarifying_question(
gap: CoverageGap, context: ClarifyContext
) -> ClarifyResponse:
"""Generate clarifying question for coverage gap"""
if not current_policy:
raise ValueError("Policy not loaded")
# Get question template
templates = current_policy.policy.question_templates
default_template = templates.default
# Build question text
evidence_name = gap.evidence_id
schedule_name = gap.schedule_id
boxes_text = ", ".join(gap.boxes) if gap.boxes else "relevant boxes"
alternatives_text = (
", ".join(gap.acceptable_alternatives)
if gap.acceptable_alternatives
else "alternative documents"
)
question_text = default_template["text"].format(
schedule=schedule_name,
tax_year=context.tax_year,
evidence=evidence_name,
boxes=boxes_text,
alternatives=alternatives_text,
)
why_text = default_template["why"].format(
why=gap.reason,
guidance_doc="policy guidance",
)
# Build upload options
options = []
if gap.acceptable_alternatives:
for alt in gap.acceptable_alternatives:
options.append(
UploadOption(
label=f"Upload {alt} (PDF/CSV)",
accepted_formats=["pdf", "csv"],
upload_endpoint=f"/v1/ingest/upload?tag={alt}",
)
)
else:
options.append(
UploadOption(
label=f"Upload {evidence_name} (PDF/CSV)",
accepted_formats=["pdf", "csv"],
upload_endpoint=f"/v1/ingest/upload?tag={evidence_name}",
)
)
return ClarifyResponse(
question_text=question_text,
why_it_is_needed=why_text,
citations=gap.citations,
options_to_provide=options,
blocking=(gap.role.value == "REQUIRED"),
boxes_affected=gap.boxes,
)
async def _validate_boxes_in_kg(policy_dict: dict[str, Any]) -> list[str]:
"""Validate that all referenced boxes exist in KG"""
if not neo4j_client:
return ["KG client not available for box validation"]
errors = []
all_boxes = set()
# Collect all box references
for schedule in policy_dict.get("schedules", {}).values():
for evidence in schedule.get("evidence", []):
all_boxes.update(evidence.get("boxes", []))
if all_boxes:
try:
from libs.neo import kg_boxes_exist
box_existence = await kg_boxes_exist(neo4j_client, list(all_boxes))
for box_id, exists in box_existence.items():
if not exists:
errors.append(f"Form box '{box_id}' not found in knowledge graph")
except Exception as e:
errors.append(f"Failed to validate boxes in KG: {str(e)}")
return errors
# Health check endpoints
@app.get("/healthz")
async def health_check() -> dict[str, str]:
"""Health check endpoint"""
return {"status": "healthy", "service": "svc-coverage"}
@app.get("/readyz")
async def readiness_check() -> dict[str, str]:
"""Readiness check endpoint"""
return {"status": "ready", "service": "svc-coverage"}
@app.get("/livez")
async def liveness_check() -> dict[str, str]:
"""Liveness check endpoint"""
return {"status": "alive", "service": "svc-coverage"}
# Metrics endpoint (internal only)
@app.get("/metrics")
async def get_metrics_endpoint() -> str:
"""Prometheus metrics endpoint"""
# This would return Prometheus format metrics
return "# Coverage service metrics\n"
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)