# FILE: apps/svc-hmrc/main.py # HMRC submission service with MTD API integration and validation import asyncio import json import os # Import shared libraries import sys from datetime import datetime from typing import Any import structlog import ulid from fastapi import BackgroundTasks, Depends, HTTPException, Request from fastapi.responses import JSONResponse sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..")) from libs.app_factory import create_app from libs.config import ( BaseAppSettings, create_event_bus, create_neo4j_client, create_vault_client, ) from libs.events import EventBus, EventPayload, EventTopics from libs.neo import Neo4jClient from libs.observability import get_metrics, get_tracer, setup_observability from libs.schemas import ErrorResponse, HMRCSubmissionRequest, HMRCSubmissionResponse from libs.security import VaultTransitHelper, get_current_user, get_tenant_id logger = structlog.get_logger() class HMRCSettings(BaseAppSettings): """Settings for HMRC service""" service_name: str = "svc-hmrc" # HMRC API configuration hmrc_base_url: str = "https://api.service.hmrc.gov.uk" hmrc_sandbox_url: str = "https://test-api.service.hmrc.gov.uk" use_sandbox: bool = True # OAuth configuration client_id: str = "" client_secret: str = "" redirect_uri: str = "http://localhost:8000/oauth/callback" # API endpoints mtd_income_tax_endpoint: str = ( "/income-tax/self-assessment/ni/{nino}/uk-property/{taxYear}" ) mtd_self_employment_endpoint: str = ( "/income-tax/self-assessment/ni/{nino}/self-employment/{businessId}" ) # Validation max_submission_retries: int = 3 submission_timeout: int = 300 # 5 minutes # Create app and settings app, settings = create_app( service_name="svc-hmrc", title="Tax Agent HMRC Service", description="HMRC submission service with MTD API integration", settings_class=HMRCSettings, ) # Global clients vault_helper: VaultTransitHelper | None = None neo4j_client: Neo4jClient | None = None event_bus: EventBus | None = None tracer = get_tracer("svc-hmrc") metrics = get_metrics() @app.on_event("startup") async def startup_event() -> None: """Initialize service dependencies""" global vault_helper, neo4j_client, event_bus logger.info("Starting HMRC service") # Setup observability setup_observability(settings) # Initialize Vault helper vault_client = create_vault_client(settings) vault_helper = VaultTransitHelper(vault_client, "tax-agent-transit") # Initialize Neo4j client neo4j_driver = create_neo4j_client(settings) neo4j_client = Neo4jClient(neo4j_driver) # Initialize event bus event_bus = create_event_bus(settings) if not event_bus: raise Exception("Event bus not initialized") await event_bus.start() # Subscribe to form completion events await event_bus.subscribe(EventTopics.FORM_FILLED, _handle_form_filled) # type: ignore logger.info("HMRC service started successfully") @app.on_event("shutdown") async def shutdown_event() -> None: """Cleanup service dependencies""" global neo4j_client, event_bus logger.info("Shutting down HMRC service") if neo4j_client: await neo4j_client.close() if event_bus: await event_bus.stop() logger.info("HMRC service shutdown complete") @app.get("/health") async def health_check() -> dict[str, Any]: """Health check endpoint""" return { "status": "healthy", "service": settings.service_name, "version": settings.service_version, "timestamp": datetime.utcnow().isoformat(), "hmrc_environment": "sandbox" if settings.use_sandbox else "production", } @app.post("/submit", response_model=HMRCSubmissionResponse) async def submit_to_hmrc( request_data: HMRCSubmissionRequest, background_tasks: BackgroundTasks, current_user: dict[str, Any] = Depends(get_current_user), tenant_id: str = Depends(get_tenant_id), ) -> HMRCSubmissionResponse: """Submit tax return to HMRC""" with tracer.start_as_current_span("submit_to_hmrc") as span: span.set_attribute("tax_year", request_data.tax_year) span.set_attribute("taxpayer_id", request_data.taxpayer_id) span.set_attribute("tenant_id", tenant_id) span.set_attribute("dry_run", request_data.dry_run) try: # Generate submission ID submission_id = str(ulid.new()) span.set_attribute("submission_id", submission_id) # Start background submission background_tasks.add_task( _submit_to_hmrc_async, request_data.tax_year, request_data.taxpayer_id, request_data.dry_run, tenant_id, submission_id, current_user.get("sub", "system"), ) logger.info( "HMRC submission started", submission_id=submission_id, taxpayer_id=request_data.taxpayer_id, dry_run=request_data.dry_run, ) return HMRCSubmissionResponse( submission_id=submission_id, status="processing", hmrc_reference=None, submission_timestamp=datetime.utcnow(), validation_results={}, dry_run=request_data.dry_run, ) except Exception as e: logger.error("Failed to start HMRC submission", error=str(e)) raise HTTPException( status_code=500, detail="Failed to start HMRC submission" ) @app.get("/submissions/{submission_id}") async def get_submission_status( submission_id: str, current_user: dict[str, Any] = Depends(get_current_user), tenant_id: str = Depends(get_tenant_id), ) -> dict[str, Any]: """Get submission status""" with tracer.start_as_current_span("get_submission_status") as span: span.set_attribute("submission_id", submission_id) span.set_attribute("tenant_id", tenant_id) try: # Get submission from Neo4j query = """ MATCH (s:Submission {submission_id: $submission_id, tenant_id: $tenant_id}) WHERE s.retracted_at IS NULL RETURN s """ if not neo4j_client: raise Exception("Neo4j client not initialized") results = await neo4j_client.run_query( # fmt: skip # pyright: ignore[reportOptionalMemberAccess] query, {"submission_id": submission_id, "tenant_id": tenant_id} ) if not results: raise HTTPException(status_code=404, detail="Submission not found") submission = results[0]["s"] return { "submission_id": submission_id, "status": submission.get("status"), "hmrc_reference": submission.get("hmrc_reference"), "submission_timestamp": submission.get("submission_timestamp"), "validation_results": json.loads( submission.get("validation_results", "{}") ), "dry_run": submission.get("dry_run", False), "error_message": submission.get("error_message"), } except HTTPException: raise except Exception as e: logger.error( "Failed to get submission status", submission_id=submission_id, error=str(e), ) raise HTTPException( status_code=500, detail="Failed to get submission status" ) @app.post("/oauth/authorize") async def initiate_oauth_flow( taxpayer_id: str, current_user: dict[str, Any] = Depends(get_current_user), tenant_id: str = Depends(get_tenant_id), ) -> dict[str, Any]: """Initiate OAuth flow for HMRC authorization""" with tracer.start_as_current_span("initiate_oauth") as span: span.set_attribute("taxpayer_id", taxpayer_id) span.set_attribute("tenant_id", tenant_id) try: # Generate state parameter for security state = str(ulid.new()) # Build authorization URL base_url = ( settings.hmrc_sandbox_url if settings.use_sandbox else settings.hmrc_base_url ) auth_url = f"{base_url}/oauth/authorize" params = { "response_type": "code", "client_id": settings.client_id, "scope": "read:self-assessment write:self-assessment", "state": state, "redirect_uri": settings.redirect_uri, } # Store state for validation await _store_oauth_state(state, taxpayer_id, tenant_id) # Build full URL param_string = "&".join([f"{k}={v}" for k, v in params.items()]) full_auth_url = f"{auth_url}?{param_string}" return { "authorization_url": full_auth_url, "state": state, "expires_in": 600, # 10 minutes } except Exception as e: logger.error("Failed to initiate OAuth flow", error=str(e)) raise HTTPException(status_code=500, detail="Failed to initiate OAuth flow") @app.post("/oauth/callback") async def handle_oauth_callback( code: str, state: str, current_user: dict[str, Any] = Depends(get_current_user), tenant_id: str = Depends(get_tenant_id), ) -> dict[str, Any]: """Handle OAuth callback from HMRC""" with tracer.start_as_current_span("handle_oauth_callback") as span: span.set_attribute("state", state) span.set_attribute("tenant_id", tenant_id) if not neo4j_client: raise HTTPException(status_code=500, detail="Neo4j client not initialized") try: # Validate state oauth_data = await _get_oauth_state(state) if not oauth_data or oauth_data.get("tenant_id") != tenant_id: raise HTTPException(status_code=400, detail="Invalid state parameter") # Exchange code for access token token_data = await _exchange_code_for_token(code) # Store encrypted tokens if vault_helper is None: raise HTTPException( status_code=500, detail="Vault helper not initialized" ) encrypted_access_token = vault_helper.encrypt_field( "hmrc-access-token", token_data["access_token"] ) encrypted_refresh_token = vault_helper.encrypt_field( "hmrc-refresh-token", token_data.get("refresh_token", "") ) # Store authorization in Neo4j auth_properties = { "taxpayer_id": oauth_data["taxpayer_id"], "tenant_id": tenant_id, "access_token": encrypted_access_token, "refresh_token": encrypted_refresh_token, "expires_at": datetime.utcnow().timestamp() + token_data.get("expires_in", 3600), "scope": token_data.get("scope", ""), "authorized_at": datetime.utcnow().isoformat(), "source": "oauth_flow", "extractor_version": "1.0.0", "valid_from": datetime.utcnow(), "asserted_at": datetime.utcnow(), } await neo4j_client.create_node("HMRCAuthorization", auth_properties) # fmt: skip # pyright: ignore[reportOptionalMemberAccess] # Clean up state await _delete_oauth_state(state) return { "status": "authorized", "taxpayer_id": oauth_data["taxpayer_id"], "scope": token_data.get("scope", ""), "expires_in": token_data.get("expires_in", 3600), } except HTTPException: raise except Exception as e: logger.error("OAuth callback failed", error=str(e)) raise HTTPException(status_code=500, detail="OAuth callback failed") async def _handle_form_filled(topic: str, payload: EventPayload) -> None: """Handle form completion events for auto-submission""" try: if not neo4j_client: raise Exception("Neo4j client not initialized") data = payload.data form_id = data.get("form_id") tenant_id = data.get("tenant_id") calculation_id = data.get("calculation_id") if not form_id or not tenant_id: logger.warning("Invalid form filled event", data=data) return # Only auto-submit if configured (this would be a tenant setting) auto_submit = False # Default to false for safety if auto_submit and calculation_id: logger.info( "Auto-submitting form to HMRC", form_id=form_id, calculation_id=calculation_id, ) # Get taxpayer ID from calculation calc_query = """ MATCH (c:Calculation {calculation_id: $calculation_id}) WHERE c.retracted_at IS NULL RETURN c.taxpayer_id as taxpayer_id, c.tax_year as tax_year """ calc_results = await neo4j_client.run_query( # fmt: skip # pyright: ignore[reportOptionalMemberAccess] calc_query, {"calculation_id": calculation_id} ) if calc_results: taxpayer_id = calc_results[0]["taxpayer_id"] tax_year = calc_results[0]["tax_year"] await _submit_to_hmrc_async( tax_year=tax_year, taxpayer_id=taxpayer_id, dry_run=True, # Always dry run for auto-submission tenant_id=tenant_id, submission_id=str(ulid.new()), actor=payload.actor, ) except Exception as e: logger.error("Failed to handle form filled event", error=str(e)) async def _submit_to_hmrc_async( tax_year: str, taxpayer_id: str, dry_run: bool, tenant_id: str, submission_id: str, actor: str, ) -> None: """Submit to HMRC asynchronously""" with tracer.start_as_current_span("submit_to_hmrc_async") as span: span.set_attribute("submission_id", submission_id) span.set_attribute("taxpayer_id", taxpayer_id) span.set_attribute("dry_run", dry_run) if not event_bus: raise Exception("Event bus not initialized") try: # Get taxpayer data taxpayer_data = await _get_taxpayer_data(taxpayer_id, tenant_id) # Get calculation data calculation_data = await _get_latest_calculation( taxpayer_id, tax_year, tenant_id ) # Validate data validation_results = await _validate_submission_data( taxpayer_data, calculation_data ) # Prepare submission submission_data = await _prepare_submission_data( taxpayer_data, calculation_data, tax_year ) # Submit to HMRC (or simulate if dry run) if dry_run: hmrc_response = await _simulate_hmrc_submission(submission_data) else: hmrc_response = await _submit_to_hmrc_api( submission_data, taxpayer_id, tenant_id ) # Store submission record await _store_submission_record( submission_id, taxpayer_id, tax_year, tenant_id, hmrc_response, validation_results, dry_run, ) # Update metrics metrics.counter("hmrc_submissions_total").labels( tenant_id=tenant_id, dry_run=str(dry_run), status=hmrc_response.get("status", "unknown"), ).inc() # Publish completion event event_payload = EventPayload( data={ "submission_id": submission_id, "taxpayer_id": taxpayer_id, "tax_year": tax_year, "tenant_id": tenant_id, "status": hmrc_response.get("status"), "hmrc_reference": hmrc_response.get("reference"), "dry_run": dry_run, }, actor=actor, tenant_id=tenant_id, ) await event_bus.publish(EventTopics.HMRC_SUBMITTED, event_payload) # fmt: skip # pyright: ignore[reportOptionalMemberAccess] logger.info( "HMRC submission completed", submission_id=submission_id, status=hmrc_response.get("status"), dry_run=dry_run, ) except Exception as e: logger.error( "HMRC submission failed", submission_id=submission_id, error=str(e) ) # Store error record await _store_submission_error(submission_id, str(e), tenant_id) # Update error metrics metrics.counter("hmrc_submission_errors_total").labels( tenant_id=tenant_id, error_type=type(e).__name__ ).inc() async def _get_taxpayer_data(taxpayer_id: str, tenant_id: str) -> dict[str, Any]: """Get taxpayer data from knowledge graph""" query = """ MATCH (t:TaxpayerProfile {taxpayer_id: $taxpayer_id, tenant_id: $tenant_id}) WHERE t.retracted_at IS NULL RETURN t """ if not neo4j_client: raise Exception("Neo4j client not initialized") results = await neo4j_client.run_query( query, {"taxpayer_id": taxpayer_id, "tenant_id": tenant_id} ) if not results: raise Exception(f"Taxpayer not found: {taxpayer_id}") return results[0]["t"] async def _get_latest_calculation( taxpayer_id: str, tax_year: str, tenant_id: str ) -> dict[str, Any]: """Get latest calculation for taxpayer and tax year""" query = """ MATCH (c:Calculation {taxpayer_id: $taxpayer_id, tax_year: $tax_year, tenant_id: $tenant_id}) WHERE c.retracted_at IS NULL RETURN c ORDER BY c.calculated_at DESC LIMIT 1 """ if not neo4j_client: raise Exception("Neo4j client not initialized") results = await neo4j_client.run_query( # fmt: skip # pyright: ignore[reportOptionalMemberAccess] query, {"taxpayer_id": taxpayer_id, "tax_year": tax_year, "tenant_id": tenant_id}, ) if not results: raise Exception( f"No calculation found for taxpayer {taxpayer_id} and tax year {tax_year}" ) return results[0]["c"] async def _validate_submission_data( taxpayer_data: dict[str, Any], calculation_data: dict[str, Any] ) -> dict[str, Any]: """Validate submission data""" validation_results: dict[str, bool | list[str]] = { "valid": True, "errors": [], "warnings": [], } # Check required taxpayer fields if not taxpayer_data.get("utr"): validation_results["errors"].append("UTR is required") validation_results["valid"] = False if not taxpayer_data.get("ni_number"): validation_results["errors"].append("National Insurance number is required") validation_results["valid"] = False # Check calculation data if not calculation_data.get("schedule"): validation_results["errors"].append("Schedule is required") validation_results["valid"] = False return validation_results async def _prepare_submission_data( taxpayer_data: dict[str, Any], calculation_data: dict[str, Any], tax_year: str ) -> dict[str, Any]: """Prepare data for HMRC submission""" # This would format data according to HMRC MTD API requirements submission_data = { "taxYear": tax_year, "nino": taxpayer_data.get("ni_number"), "utr": taxpayer_data.get("utr"), "schedule": calculation_data.get("schedule"), "submissionTimestamp": datetime.utcnow().isoformat(), } return submission_data async def _simulate_hmrc_submission(submission_data: dict[str, Any]) -> dict[str, Any]: """Simulate HMRC submission for dry run""" # Simulate processing delay await asyncio.sleep(1) return { "status": "accepted", "reference": f"DRY_RUN_{ulid.new()}", "timestamp": datetime.utcnow().isoformat(), "dry_run": True, } async def _submit_to_hmrc_api( submission_data: dict[str, Any], taxpayer_id: str, tenant_id: str ) -> dict[str, Any]: """Submit to actual HMRC API""" # This would implement the actual HMRC MTD API calls # For now, return mock response logger.warning("Actual HMRC API submission not implemented") return { "status": "not_implemented", "reference": None, "timestamp": datetime.utcnow().isoformat(), "error": "HMRC API integration not implemented", } async def _store_submission_record( submission_id: str, taxpayer_id: str, tax_year: str, tenant_id: str, hmrc_response: dict[str, Any], validation_results: dict[str, Any], dry_run: bool, ) -> None: """Store submission record in knowledge graph""" submission_properties = { "submission_id": submission_id, "taxpayer_id": taxpayer_id, "tax_year": tax_year, "tenant_id": tenant_id, "status": hmrc_response.get("status"), "hmrc_reference": hmrc_response.get("reference"), "submission_timestamp": hmrc_response.get("timestamp"), "validation_results": json.dumps(validation_results), "dry_run": dry_run, "source": "hmrc_service", "extractor_version": "1.0.0", "valid_from": datetime.utcnow(), "asserted_at": datetime.utcnow(), } if not neo4j_client: raise Exception("Neo4j client not initialized") await neo4j_client.create_node("Submission", submission_properties) # fmt: skip # pyright: ignore[reportOptionalMemberAccess] async def _store_submission_error( submission_id: str, error_message: str, tenant_id: str ) -> None: """Store submission error""" error_properties = { "submission_id": submission_id, "tenant_id": tenant_id, "status": "error", "error_message": error_message, "submission_timestamp": datetime.utcnow().isoformat(), "source": "hmrc_service", "extractor_version": "1.0.0", "valid_from": datetime.utcnow(), "asserted_at": datetime.utcnow(), } if not neo4j_client: raise Exception("Neo4j client not initialized") await neo4j_client.create_node("Submission", error_properties) # fmt: skip # pyright: ignore[reportOptionalMemberAccess] async def _store_oauth_state(state: str, taxpayer_id: str, tenant_id: str) -> None: """Store OAuth state temporarily""" # This would use Redis or similar for temporary storage # For now, just log logger.debug("OAuth state stored", state=state, taxpayer_id=taxpayer_id) async def _get_oauth_state(state: str) -> dict[str, Any] | None: """Get OAuth state""" # This would retrieve from Redis # For now, return mock data return {"taxpayer_id": "test_taxpayer", "tenant_id": "test_tenant"} async def _delete_oauth_state(state: str) -> None: """Delete OAuth state""" # This would delete from Redis logger.debug("OAuth state deleted", state=state) async def _exchange_code_for_token(code: str) -> dict[str, Any]: """Exchange authorization code for access token""" # This would call HMRC token endpoint # For now, return mock token return { "access_token": "mock_access_token", "refresh_token": "mock_refresh_token", "expires_in": 3600, "scope": "read:self-assessment write:self-assessment", } @app.exception_handler(HTTPException) async def http_exception_handler(request: Request, exc: HTTPException) -> JSONResponse: """Handle HTTP exceptions with RFC7807 format""" return JSONResponse( status_code=exc.status_code, content=ErrorResponse( type=f"https://httpstatuses.com/{exc.status_code}", title=exc.detail, status=exc.status_code, detail=exc.detail, instance=str(request.url), trace_id=getattr(request.state, "trace_id", None), ).model_dump(), ) if __name__ == "__main__": import uvicorn uvicorn.run("main:app", host="0.0.0.0", port=8010, reload=True, log_config=None)