Files
ai-tax-agent/apps/svc_hmrc/main.py
harkon b324ff09ef
Some checks failed
CI/CD Pipeline / Code Quality & Linting (push) Has been cancelled
CI/CD Pipeline / Policy Validation (push) Has been cancelled
CI/CD Pipeline / Test Suite (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-firm-connectors) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-forms) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-hmrc) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ingestion) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-normalize-map) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ocr) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-indexer) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-reason) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rpa) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (ui-review) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (ui-review) (push) Has been cancelled
CI/CD Pipeline / Generate SBOM (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Notifications (push) Has been cancelled
Initial commit
2025-10-11 08:41:36 +01:00

760 lines
24 KiB
Python

# FILE: apps/svc-hmrc/main.py
# HMRC submission service with MTD API integration and validation
import asyncio
import json
import os
# Import shared libraries
import sys
from datetime import datetime
from typing import Any
import structlog
import ulid
from fastapi import BackgroundTasks, Depends, HTTPException, Request
from fastapi.responses import JSONResponse
sys.path.append(os.path.join(os.path.dirname(__file__), "..", ".."))
from libs.app_factory import create_app
from libs.config import (
BaseAppSettings,
create_event_bus,
create_neo4j_client,
create_vault_client,
)
from libs.events import EventBus, EventPayload, EventTopics
from libs.neo import Neo4jClient
from libs.observability import get_metrics, get_tracer, setup_observability
from libs.schemas import ErrorResponse, HMRCSubmissionRequest, HMRCSubmissionResponse
from libs.security import VaultTransitHelper, get_current_user, get_tenant_id
logger = structlog.get_logger()
class HMRCSettings(BaseAppSettings):
"""Settings for HMRC service"""
service_name: str = "svc-hmrc"
# HMRC API configuration
hmrc_base_url: str = "https://api.service.hmrc.gov.uk"
hmrc_sandbox_url: str = "https://test-api.service.hmrc.gov.uk"
use_sandbox: bool = True
# OAuth configuration
client_id: str = ""
client_secret: str = ""
redirect_uri: str = "http://localhost:8000/oauth/callback"
# API endpoints
mtd_income_tax_endpoint: str = (
"/income-tax/self-assessment/ni/{nino}/uk-property/{taxYear}"
)
mtd_self_employment_endpoint: str = (
"/income-tax/self-assessment/ni/{nino}/self-employment/{businessId}"
)
# Validation
max_submission_retries: int = 3
submission_timeout: int = 300 # 5 minutes
# Create app and settings
app, settings = create_app(
service_name="svc-hmrc",
title="Tax Agent HMRC Service",
description="HMRC submission service with MTD API integration",
settings_class=HMRCSettings,
)
# Global clients
vault_helper: VaultTransitHelper | None = None
neo4j_client: Neo4jClient | None = None
event_bus: EventBus | None = None
tracer = get_tracer("svc-hmrc")
metrics = get_metrics()
@app.on_event("startup")
async def startup_event() -> None:
"""Initialize service dependencies"""
global vault_helper, neo4j_client, event_bus
logger.info("Starting HMRC service")
# Setup observability
setup_observability(settings)
# Initialize Vault helper
vault_client = create_vault_client(settings)
vault_helper = VaultTransitHelper(vault_client, "tax-agent-transit")
# Initialize Neo4j client
neo4j_driver = create_neo4j_client(settings)
neo4j_client = Neo4jClient(neo4j_driver)
# Initialize event bus
event_bus = create_event_bus(settings)
if not event_bus:
raise Exception("Event bus not initialized")
await event_bus.start()
# Subscribe to form completion events
await event_bus.subscribe(EventTopics.FORM_FILLED, _handle_form_filled) # type: ignore
logger.info("HMRC service started successfully")
@app.on_event("shutdown")
async def shutdown_event() -> None:
"""Cleanup service dependencies"""
global neo4j_client, event_bus
logger.info("Shutting down HMRC service")
if neo4j_client:
await neo4j_client.close()
if event_bus:
await event_bus.stop()
logger.info("HMRC service shutdown complete")
@app.get("/health")
async def health_check() -> dict[str, Any]:
"""Health check endpoint"""
return {
"status": "healthy",
"service": settings.service_name,
"version": settings.service_version,
"timestamp": datetime.utcnow().isoformat(),
"hmrc_environment": "sandbox" if settings.use_sandbox else "production",
}
@app.post("/submit", response_model=HMRCSubmissionResponse)
async def submit_to_hmrc(
request_data: HMRCSubmissionRequest,
background_tasks: BackgroundTasks,
current_user: dict[str, Any] = Depends(get_current_user),
tenant_id: str = Depends(get_tenant_id),
) -> HMRCSubmissionResponse:
"""Submit tax return to HMRC"""
with tracer.start_as_current_span("submit_to_hmrc") as span:
span.set_attribute("tax_year", request_data.tax_year)
span.set_attribute("taxpayer_id", request_data.taxpayer_id)
span.set_attribute("tenant_id", tenant_id)
span.set_attribute("dry_run", request_data.dry_run)
try:
# Generate submission ID
submission_id = str(ulid.new())
span.set_attribute("submission_id", submission_id)
# Start background submission
background_tasks.add_task(
_submit_to_hmrc_async,
request_data.tax_year,
request_data.taxpayer_id,
request_data.dry_run,
tenant_id,
submission_id,
current_user.get("sub", "system"),
)
logger.info(
"HMRC submission started",
submission_id=submission_id,
taxpayer_id=request_data.taxpayer_id,
dry_run=request_data.dry_run,
)
return HMRCSubmissionResponse(
submission_id=submission_id,
status="processing",
hmrc_reference=None,
submission_timestamp=datetime.utcnow(),
validation_results={},
dry_run=request_data.dry_run,
)
except Exception as e:
logger.error("Failed to start HMRC submission", error=str(e))
raise HTTPException(
status_code=500, detail="Failed to start HMRC submission"
)
@app.get("/submissions/{submission_id}")
async def get_submission_status(
submission_id: str,
current_user: dict[str, Any] = Depends(get_current_user),
tenant_id: str = Depends(get_tenant_id),
) -> dict[str, Any]:
"""Get submission status"""
with tracer.start_as_current_span("get_submission_status") as span:
span.set_attribute("submission_id", submission_id)
span.set_attribute("tenant_id", tenant_id)
try:
# Get submission from Neo4j
query = """
MATCH (s:Submission {submission_id: $submission_id, tenant_id: $tenant_id})
WHERE s.retracted_at IS NULL
RETURN s
"""
if not neo4j_client:
raise Exception("Neo4j client not initialized")
results = await neo4j_client.run_query( # fmt: skip # pyright: ignore[reportOptionalMemberAccess]
query, {"submission_id": submission_id, "tenant_id": tenant_id}
)
if not results:
raise HTTPException(status_code=404, detail="Submission not found")
submission = results[0]["s"]
return {
"submission_id": submission_id,
"status": submission.get("status"),
"hmrc_reference": submission.get("hmrc_reference"),
"submission_timestamp": submission.get("submission_timestamp"),
"validation_results": json.loads(
submission.get("validation_results", "{}")
),
"dry_run": submission.get("dry_run", False),
"error_message": submission.get("error_message"),
}
except HTTPException:
raise
except Exception as e:
logger.error(
"Failed to get submission status",
submission_id=submission_id,
error=str(e),
)
raise HTTPException(
status_code=500, detail="Failed to get submission status"
)
@app.post("/oauth/authorize")
async def initiate_oauth_flow(
taxpayer_id: str,
current_user: dict[str, Any] = Depends(get_current_user),
tenant_id: str = Depends(get_tenant_id),
) -> dict[str, Any]:
"""Initiate OAuth flow for HMRC authorization"""
with tracer.start_as_current_span("initiate_oauth") as span:
span.set_attribute("taxpayer_id", taxpayer_id)
span.set_attribute("tenant_id", tenant_id)
try:
# Generate state parameter for security
state = str(ulid.new())
# Build authorization URL
base_url = (
settings.hmrc_sandbox_url
if settings.use_sandbox
else settings.hmrc_base_url
)
auth_url = f"{base_url}/oauth/authorize"
params = {
"response_type": "code",
"client_id": settings.client_id,
"scope": "read:self-assessment write:self-assessment",
"state": state,
"redirect_uri": settings.redirect_uri,
}
# Store state for validation
await _store_oauth_state(state, taxpayer_id, tenant_id)
# Build full URL
param_string = "&".join([f"{k}={v}" for k, v in params.items()])
full_auth_url = f"{auth_url}?{param_string}"
return {
"authorization_url": full_auth_url,
"state": state,
"expires_in": 600, # 10 minutes
}
except Exception as e:
logger.error("Failed to initiate OAuth flow", error=str(e))
raise HTTPException(status_code=500, detail="Failed to initiate OAuth flow")
@app.post("/oauth/callback")
async def handle_oauth_callback(
code: str,
state: str,
current_user: dict[str, Any] = Depends(get_current_user),
tenant_id: str = Depends(get_tenant_id),
) -> dict[str, Any]:
"""Handle OAuth callback from HMRC"""
with tracer.start_as_current_span("handle_oauth_callback") as span:
span.set_attribute("state", state)
span.set_attribute("tenant_id", tenant_id)
if not neo4j_client:
raise HTTPException(status_code=500, detail="Neo4j client not initialized")
try:
# Validate state
oauth_data = await _get_oauth_state(state)
if not oauth_data or oauth_data.get("tenant_id") != tenant_id:
raise HTTPException(status_code=400, detail="Invalid state parameter")
# Exchange code for access token
token_data = await _exchange_code_for_token(code)
# Store encrypted tokens
if vault_helper is None:
raise HTTPException(
status_code=500, detail="Vault helper not initialized"
)
encrypted_access_token = vault_helper.encrypt_field(
"hmrc-access-token", token_data["access_token"]
)
encrypted_refresh_token = vault_helper.encrypt_field(
"hmrc-refresh-token", token_data.get("refresh_token", "")
)
# Store authorization in Neo4j
auth_properties = {
"taxpayer_id": oauth_data["taxpayer_id"],
"tenant_id": tenant_id,
"access_token": encrypted_access_token,
"refresh_token": encrypted_refresh_token,
"expires_at": datetime.utcnow().timestamp()
+ token_data.get("expires_in", 3600),
"scope": token_data.get("scope", ""),
"authorized_at": datetime.utcnow().isoformat(),
"source": "oauth_flow",
"extractor_version": "1.0.0",
"valid_from": datetime.utcnow(),
"asserted_at": datetime.utcnow(),
}
await neo4j_client.create_node("HMRCAuthorization", auth_properties) # fmt: skip # pyright: ignore[reportOptionalMemberAccess]
# Clean up state
await _delete_oauth_state(state)
return {
"status": "authorized",
"taxpayer_id": oauth_data["taxpayer_id"],
"scope": token_data.get("scope", ""),
"expires_in": token_data.get("expires_in", 3600),
}
except HTTPException:
raise
except Exception as e:
logger.error("OAuth callback failed", error=str(e))
raise HTTPException(status_code=500, detail="OAuth callback failed")
async def _handle_form_filled(topic: str, payload: EventPayload) -> None:
"""Handle form completion events for auto-submission"""
try:
if not neo4j_client:
raise Exception("Neo4j client not initialized")
data = payload.data
form_id = data.get("form_id")
tenant_id = data.get("tenant_id")
calculation_id = data.get("calculation_id")
if not form_id or not tenant_id:
logger.warning("Invalid form filled event", data=data)
return
# Only auto-submit if configured (this would be a tenant setting)
auto_submit = False # Default to false for safety
if auto_submit and calculation_id:
logger.info(
"Auto-submitting form to HMRC",
form_id=form_id,
calculation_id=calculation_id,
)
# Get taxpayer ID from calculation
calc_query = """
MATCH (c:Calculation {calculation_id: $calculation_id})
WHERE c.retracted_at IS NULL
RETURN c.taxpayer_id as taxpayer_id, c.tax_year as tax_year
"""
calc_results = await neo4j_client.run_query( # fmt: skip # pyright: ignore[reportOptionalMemberAccess]
calc_query, {"calculation_id": calculation_id}
)
if calc_results:
taxpayer_id = calc_results[0]["taxpayer_id"]
tax_year = calc_results[0]["tax_year"]
await _submit_to_hmrc_async(
tax_year=tax_year,
taxpayer_id=taxpayer_id,
dry_run=True, # Always dry run for auto-submission
tenant_id=tenant_id,
submission_id=str(ulid.new()),
actor=payload.actor,
)
except Exception as e:
logger.error("Failed to handle form filled event", error=str(e))
async def _submit_to_hmrc_async(
tax_year: str,
taxpayer_id: str,
dry_run: bool,
tenant_id: str,
submission_id: str,
actor: str,
) -> None:
"""Submit to HMRC asynchronously"""
with tracer.start_as_current_span("submit_to_hmrc_async") as span:
span.set_attribute("submission_id", submission_id)
span.set_attribute("taxpayer_id", taxpayer_id)
span.set_attribute("dry_run", dry_run)
if not event_bus:
raise Exception("Event bus not initialized")
try:
# Get taxpayer data
taxpayer_data = await _get_taxpayer_data(taxpayer_id, tenant_id)
# Get calculation data
calculation_data = await _get_latest_calculation(
taxpayer_id, tax_year, tenant_id
)
# Validate data
validation_results = await _validate_submission_data(
taxpayer_data, calculation_data
)
# Prepare submission
submission_data = await _prepare_submission_data(
taxpayer_data, calculation_data, tax_year
)
# Submit to HMRC (or simulate if dry run)
if dry_run:
hmrc_response = await _simulate_hmrc_submission(submission_data)
else:
hmrc_response = await _submit_to_hmrc_api(
submission_data, taxpayer_id, tenant_id
)
# Store submission record
await _store_submission_record(
submission_id,
taxpayer_id,
tax_year,
tenant_id,
hmrc_response,
validation_results,
dry_run,
)
# Update metrics
metrics.counter("hmrc_submissions_total").labels(
tenant_id=tenant_id,
dry_run=str(dry_run),
status=hmrc_response.get("status", "unknown"),
).inc()
# Publish completion event
event_payload = EventPayload(
data={
"submission_id": submission_id,
"taxpayer_id": taxpayer_id,
"tax_year": tax_year,
"tenant_id": tenant_id,
"status": hmrc_response.get("status"),
"hmrc_reference": hmrc_response.get("reference"),
"dry_run": dry_run,
},
actor=actor,
tenant_id=tenant_id,
)
await event_bus.publish(EventTopics.HMRC_SUBMITTED, event_payload) # fmt: skip # pyright: ignore[reportOptionalMemberAccess]
logger.info(
"HMRC submission completed",
submission_id=submission_id,
status=hmrc_response.get("status"),
dry_run=dry_run,
)
except Exception as e:
logger.error(
"HMRC submission failed", submission_id=submission_id, error=str(e)
)
# Store error record
await _store_submission_error(submission_id, str(e), tenant_id)
# Update error metrics
metrics.counter("hmrc_submission_errors_total").labels(
tenant_id=tenant_id, error_type=type(e).__name__
).inc()
async def _get_taxpayer_data(taxpayer_id: str, tenant_id: str) -> dict[str, Any]:
"""Get taxpayer data from knowledge graph"""
query = """
MATCH (t:TaxpayerProfile {taxpayer_id: $taxpayer_id, tenant_id: $tenant_id})
WHERE t.retracted_at IS NULL
RETURN t
"""
if not neo4j_client:
raise Exception("Neo4j client not initialized")
results = await neo4j_client.run_query(
query, {"taxpayer_id": taxpayer_id, "tenant_id": tenant_id}
)
if not results:
raise Exception(f"Taxpayer not found: {taxpayer_id}")
return results[0]["t"]
async def _get_latest_calculation(
taxpayer_id: str, tax_year: str, tenant_id: str
) -> dict[str, Any]:
"""Get latest calculation for taxpayer and tax year"""
query = """
MATCH (c:Calculation {taxpayer_id: $taxpayer_id, tax_year: $tax_year, tenant_id: $tenant_id})
WHERE c.retracted_at IS NULL
RETURN c
ORDER BY c.calculated_at DESC
LIMIT 1
"""
if not neo4j_client:
raise Exception("Neo4j client not initialized")
results = await neo4j_client.run_query( # fmt: skip # pyright: ignore[reportOptionalMemberAccess]
query,
{"taxpayer_id": taxpayer_id, "tax_year": tax_year, "tenant_id": tenant_id},
)
if not results:
raise Exception(
f"No calculation found for taxpayer {taxpayer_id} and tax year {tax_year}"
)
return results[0]["c"]
async def _validate_submission_data(
taxpayer_data: dict[str, Any], calculation_data: dict[str, Any]
) -> dict[str, Any]:
"""Validate submission data"""
validation_results: dict[str, bool | list[str]] = {
"valid": True,
"errors": [],
"warnings": [],
}
# Check required taxpayer fields
if not taxpayer_data.get("utr"):
validation_results["errors"].append("UTR is required")
validation_results["valid"] = False
if not taxpayer_data.get("ni_number"):
validation_results["errors"].append("National Insurance number is required")
validation_results["valid"] = False
# Check calculation data
if not calculation_data.get("schedule"):
validation_results["errors"].append("Schedule is required")
validation_results["valid"] = False
return validation_results
async def _prepare_submission_data(
taxpayer_data: dict[str, Any], calculation_data: dict[str, Any], tax_year: str
) -> dict[str, Any]:
"""Prepare data for HMRC submission"""
# This would format data according to HMRC MTD API requirements
submission_data = {
"taxYear": tax_year,
"nino": taxpayer_data.get("ni_number"),
"utr": taxpayer_data.get("utr"),
"schedule": calculation_data.get("schedule"),
"submissionTimestamp": datetime.utcnow().isoformat(),
}
return submission_data
async def _simulate_hmrc_submission(submission_data: dict[str, Any]) -> dict[str, Any]:
"""Simulate HMRC submission for dry run"""
# Simulate processing delay
await asyncio.sleep(1)
return {
"status": "accepted",
"reference": f"DRY_RUN_{ulid.new()}",
"timestamp": datetime.utcnow().isoformat(),
"dry_run": True,
}
async def _submit_to_hmrc_api(
submission_data: dict[str, Any], taxpayer_id: str, tenant_id: str
) -> dict[str, Any]:
"""Submit to actual HMRC API"""
# This would implement the actual HMRC MTD API calls
# For now, return mock response
logger.warning("Actual HMRC API submission not implemented")
return {
"status": "not_implemented",
"reference": None,
"timestamp": datetime.utcnow().isoformat(),
"error": "HMRC API integration not implemented",
}
async def _store_submission_record(
submission_id: str,
taxpayer_id: str,
tax_year: str,
tenant_id: str,
hmrc_response: dict[str, Any],
validation_results: dict[str, Any],
dry_run: bool,
) -> None:
"""Store submission record in knowledge graph"""
submission_properties = {
"submission_id": submission_id,
"taxpayer_id": taxpayer_id,
"tax_year": tax_year,
"tenant_id": tenant_id,
"status": hmrc_response.get("status"),
"hmrc_reference": hmrc_response.get("reference"),
"submission_timestamp": hmrc_response.get("timestamp"),
"validation_results": json.dumps(validation_results),
"dry_run": dry_run,
"source": "hmrc_service",
"extractor_version": "1.0.0",
"valid_from": datetime.utcnow(),
"asserted_at": datetime.utcnow(),
}
if not neo4j_client:
raise Exception("Neo4j client not initialized")
await neo4j_client.create_node("Submission", submission_properties) # fmt: skip # pyright: ignore[reportOptionalMemberAccess]
async def _store_submission_error(
submission_id: str, error_message: str, tenant_id: str
) -> None:
"""Store submission error"""
error_properties = {
"submission_id": submission_id,
"tenant_id": tenant_id,
"status": "error",
"error_message": error_message,
"submission_timestamp": datetime.utcnow().isoformat(),
"source": "hmrc_service",
"extractor_version": "1.0.0",
"valid_from": datetime.utcnow(),
"asserted_at": datetime.utcnow(),
}
if not neo4j_client:
raise Exception("Neo4j client not initialized")
await neo4j_client.create_node("Submission", error_properties) # fmt: skip # pyright: ignore[reportOptionalMemberAccess]
async def _store_oauth_state(state: str, taxpayer_id: str, tenant_id: str) -> None:
"""Store OAuth state temporarily"""
# This would use Redis or similar for temporary storage
# For now, just log
logger.debug("OAuth state stored", state=state, taxpayer_id=taxpayer_id)
async def _get_oauth_state(state: str) -> dict[str, Any] | None:
"""Get OAuth state"""
# This would retrieve from Redis
# For now, return mock data
return {"taxpayer_id": "test_taxpayer", "tenant_id": "test_tenant"}
async def _delete_oauth_state(state: str) -> None:
"""Delete OAuth state"""
# This would delete from Redis
logger.debug("OAuth state deleted", state=state)
async def _exchange_code_for_token(code: str) -> dict[str, Any]:
"""Exchange authorization code for access token"""
# This would call HMRC token endpoint
# For now, return mock token
return {
"access_token": "mock_access_token",
"refresh_token": "mock_refresh_token",
"expires_in": 3600,
"scope": "read:self-assessment write:self-assessment",
}
@app.exception_handler(HTTPException)
async def http_exception_handler(request: Request, exc: HTTPException) -> JSONResponse:
"""Handle HTTP exceptions with RFC7807 format"""
return JSONResponse(
status_code=exc.status_code,
content=ErrorResponse(
type=f"https://httpstatuses.com/{exc.status_code}",
title=exc.detail,
status=exc.status_code,
detail=exc.detail,
instance=str(request.url),
trace_id=getattr(request.state, "trace_id", None),
).model_dump(),
)
if __name__ == "__main__":
import uvicorn
uvicorn.run("main:app", host="0.0.0.0", port=8010, reload=True, log_config=None)