"""PDF form filling with evidence pack generation.""" # FILE: apps/svc-forms/main.py # pylint: disable=wrong-import-position,import-error,too-few-public-methods,global-statement # pylint: disable=global-variable-not-assigned,raise-missing-from,unused-argument # pylint: disable=broad-exception-caught,no-else-return,too-many-arguments,too-many-positional-arguments # pylint: disable=too-many-locals,import-outside-toplevel # mypy: disable-error-code=union-attr import os # Import shared libraries import sys from datetime import datetime from io import BytesIO from typing import Any import structlog import ulid from fastapi import BackgroundTasks, Depends, HTTPException, Request from fastapi.responses import JSONResponse, Response sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..")) from libs.app_factory import create_app from libs.config import ( BaseAppSettings, create_event_bus, create_minio_client, create_neo4j_client, ) from libs.events import EventBus, EventPayload, EventTopics from libs.forms import UK_TAX_FORMS, EvidencePackGenerator, PDFFormFiller from libs.neo import Neo4jClient from libs.observability import get_metrics, get_tracer, setup_observability from libs.schemas import ErrorResponse from libs.security import get_current_user, get_tenant_id from libs.storage import DocumentStorage, StorageClient logger = structlog.get_logger() class FormsSettings(BaseAppSettings): """Settings for forms service""" service_name: str = "svc-forms" # Form templates forms_template_dir: str = "forms/templates" output_bucket: str = "filled-forms" evidence_packs_bucket: str = "evidence-packs" # Supported forms supported_forms: list[str] = ["SA100", "SA103", "SA105", "SA106"] # PDF configuration pdf_quality: str = "high" flatten_forms: bool = True # Create app and settings app, settings = create_app( service_name="svc-forms", title="Tax Agent Forms Service", description="PDF form filling and evidence pack generation", settings_class=FormsSettings, ) # Global clients storage_client: StorageClient | None = None document_storage: DocumentStorage | None = None neo4j_client: Neo4jClient | None = None pdf_form_filler: PDFFormFiller | None = None evidence_pack_generator: EvidencePackGenerator | None = None event_bus: EventBus | None = None tracer = get_tracer("svc-forms") metrics = get_metrics() @app.on_event("startup") async def startup_event() -> None: """Initialize service dependencies""" global storage_client, document_storage, neo4j_client, pdf_form_filler # pylint: disable=line-too-long global evidence_pack_generator, event_bus logger.info("Starting forms service") # Setup observability setup_observability(settings) # Initialize MinIO client minio_client = create_minio_client(settings) storage_client = StorageClient(minio_client) document_storage = DocumentStorage(storage_client) # Initialize Neo4j client neo4j_driver = create_neo4j_client(settings) neo4j_client = Neo4jClient(neo4j_driver) # Initialize PDF form filler pdf_form_filler = PDFFormFiller() # Load form templates for form_id in settings.supported_forms: template_path = os.path.join(settings.forms_template_dir, f"{form_id}.pdf") if os.path.exists(template_path): pdf_form_filler.load_template(form_id, template_path) else: logger.warning( "Form template not found", form_id=form_id, path=template_path ) # Initialize evidence pack generator evidence_pack_generator = EvidencePackGenerator(storage_client) # Initialize event bus event_bus = create_event_bus(settings) await event_bus.start() # fmt: skip # pyright: ignore[reportOptionalMemberAccess] # Subscribe to calculation completion events await event_bus.subscribe( # fmt: skip # pyright: ignore[reportOptionalMemberAccess] EventTopics.CALC_SCHEDULE_READY, _handle_calculation_ready ) # Ensure buckets exist await storage_client.ensure_bucket(settings.output_bucket) await storage_client.ensure_bucket(settings.evidence_packs_bucket) logger.info("Forms service started successfully") @app.on_event("shutdown") async def shutdown_event() -> None: """Cleanup service dependencies""" global neo4j_client, event_bus logger.info("Shutting down forms service") if neo4j_client: await neo4j_client.close() if event_bus: await event_bus.stop() logger.info("Forms service shutdown complete") @app.get("/health") async def health_check() -> dict[str, Any]: """Health check endpoint""" return { "status": "healthy", "service": settings.service_name, "version": "1.0.0", "timestamp": datetime.now().isoformat(), "supported_forms": settings.supported_forms, } @app.post("/fill/{form_id}") async def fill_form( form_id: str, field_values: dict[str, Any], background_tasks: BackgroundTasks, current_user: dict[str, Any] = Depends(get_current_user), tenant_id: str = Depends(get_tenant_id), ) -> dict[str, Any]: """Fill PDF form with provided values""" with tracer.start_as_current_span("fill_form") as span: span.set_attribute("form_id", form_id) span.set_attribute("tenant_id", tenant_id) span.set_attribute("field_count", len(field_values)) try: # Validate form ID if form_id not in settings.supported_forms: raise HTTPException( status_code=400, detail=f"Unsupported form: {form_id}" ) # Generate filling ID filling_id = str(ulid.new()) span.set_attribute("filling_id", filling_id) # Start background form filling background_tasks.add_task( _fill_form_async, form_id, field_values, tenant_id, filling_id, current_user.get("sub", "system"), ) logger.info("Form filling started", form_id=form_id, filling_id=filling_id) return { "filling_id": filling_id, "form_id": form_id, "status": "filling", "field_count": len(field_values), } except HTTPException: raise except Exception as e: logger.error("Failed to start form filling", form_id=form_id, error=str(e)) raise HTTPException(status_code=500, detail="Failed to start form filling") @app.post("/fill-from-calculation/{calculation_id}") async def fill_form_from_calculation( calculation_id: str, background_tasks: BackgroundTasks, current_user: dict[str, Any] = Depends(get_current_user), tenant_id: str = Depends(get_tenant_id), ) -> dict[str, Any]: """Fill form using calculation results""" with tracer.start_as_current_span("fill_form_from_calculation") as span: span.set_attribute("calculation_id", calculation_id) span.set_attribute("tenant_id", tenant_id) try: # Get calculation from Neo4j calc_query = """ MATCH (c:Calculation {calculation_id: $calculation_id, tenant_id: $tenant_id}) WHERE c.retracted_at IS NULL RETURN c """ calc_results = await neo4j_client.run_query( # pyright: ignore[reportOptionalMemberAccess] calc_query, {"calculation_id": calculation_id, "tenant_id": tenant_id} ) if not calc_results: raise HTTPException(status_code=404, detail="Calculation not found") calculation = calc_results[0]["c"] form_id = calculation.get("schedule") if not form_id: raise HTTPException( status_code=400, detail="No schedule found in calculation" ) # Get form boxes boxes_query = """ MATCH (c:Calculation {calculation_id: $calculation_id})-[:HAS_BOX]->(b:FormBox) WHERE c.retracted_at IS NULL AND b.retracted_at IS NULL RETURN b """ box_results = await neo4j_client.run_query( # pyright: ignore[reportOptionalMemberAccess] boxes_query, {"calculation_id": calculation_id} ) # Convert form boxes to field values field_values = {} for box_result in box_results: box = box_result["b"] field_values[f"box_{box['box']}"] = box["value"] # Generate filling ID filling_id = str(ulid.new()) span.set_attribute("filling_id", filling_id) span.set_attribute("form_id", form_id) # Start background form filling background_tasks.add_task( _fill_form_async, form_id, field_values, tenant_id, filling_id, current_user.get("sub", "system"), calculation_id, ) logger.info( "Form filling from calculation started", form_id=form_id, filling_id=filling_id, calculation_id=calculation_id, ) return { "filling_id": filling_id, "form_id": form_id, "calculation_id": calculation_id, "status": "filling", "field_count": len(field_values), } except HTTPException: raise except Exception as e: logger.error( "Failed to fill form from calculation", calculation_id=calculation_id, error=str(e), ) raise HTTPException( status_code=500, detail="Failed to fill form from calculation" ) @app.get("/download/{filling_id}") async def download_filled_form( filling_id: str, current_user: dict[str, Any] = Depends(get_current_user), tenant_id: str = Depends(get_tenant_id), ) -> Response: """Download filled form""" with tracer.start_as_current_span("download_filled_form") as span: span.set_attribute("filling_id", filling_id) span.set_attribute("tenant_id", tenant_id) try: # Get filled form from storage object_key = f"tenants/{tenant_id}/filled/{filling_id}.pdf" form_content = await storage_client.get_object( # pyright: ignore[reportOptionalMemberAccess] settings.output_bucket, object_key ) if not form_content: raise HTTPException(status_code=404, detail="Filled form not found") return Response( content=form_content, media_type="application/pdf", headers={ "Content-Disposition": f"attachment; filename={filling_id}.pdf" }, ) except HTTPException: raise except Exception as e: logger.error( "Failed to download filled form", filling_id=filling_id, error=str(e) ) raise HTTPException( status_code=500, detail="Failed to download filled form" ) @app.post("/evidence-pack") async def create_evidence_pack( taxpayer_id: str, tax_year: str, scope: str, evidence_items: list[dict[str, Any]], background_tasks: BackgroundTasks, current_user: dict[str, Any] = Depends(get_current_user), tenant_id: str = Depends(get_tenant_id), ) -> dict[str, Any]: """Create evidence pack with supporting documents""" with tracer.start_as_current_span("create_evidence_pack") as span: span.set_attribute("taxpayer_id", taxpayer_id) span.set_attribute("tax_year", tax_year) span.set_attribute("scope", scope) span.set_attribute("tenant_id", tenant_id) span.set_attribute("evidence_count", len(evidence_items)) try: # Generate pack ID pack_id = str(ulid.new()) span.set_attribute("pack_id", pack_id) # Start background pack creation background_tasks.add_task( _create_evidence_pack_async, taxpayer_id, tax_year, scope, evidence_items, tenant_id, pack_id, current_user.get("sub", "system"), ) logger.info( "Evidence pack creation started", pack_id=pack_id, taxpayer_id=taxpayer_id, scope=scope, ) return { "pack_id": pack_id, "taxpayer_id": taxpayer_id, "tax_year": tax_year, "scope": scope, "status": "creating", "evidence_count": len(evidence_items), } except Exception as e: logger.error("Failed to start evidence pack creation", error=str(e)) raise HTTPException( status_code=500, detail="Failed to start evidence pack creation" ) @app.get("/forms") async def list_supported_forms( current_user: dict[str, Any] = Depends(get_current_user), tenant_id: str = Depends(get_tenant_id), ) -> dict[str, Any]: """List supported forms with field information""" try: forms_info = [] for form_id in settings.supported_forms: form_config = UK_TAX_FORMS.get(form_id, {}) # Get form fields if template is loaded fields = [] if pdf_form_filler and form_id in pdf_form_filler.form_templates: fields = pdf_form_filler.get_form_fields(form_id) forms_info.append( { "form_id": form_id, "name": form_config.get("name", form_id), "template_available": form_id in (pdf_form_filler.form_templates if pdf_form_filler else {}), "field_count": len(fields), "fields": fields[:10], # Limit to first 10 fields for overview } ) return {"supported_forms": forms_info, "total_forms": len(forms_info)} except Exception as e: logger.error("Failed to list forms", error=str(e)) raise HTTPException(status_code=500, detail="Failed to list forms") async def _handle_calculation_ready(topic: str, payload: EventPayload) -> None: """Handle calculation completion events for auto-form filling""" try: data = payload.data calculation_id = data.get("calculation_id") schedule = data.get("schedule") tenant_id = data.get("tenant_id") if not calculation_id or not schedule or not tenant_id: logger.warning("Invalid calculation ready event", data=data) return logger.info( "Auto-filling form from calculation", calculation_id=calculation_id, schedule=schedule, ) # Get form boxes from event data form_boxes = data.get("form_boxes", {}) # Convert to field values field_values = {} for box_id, box_data in form_boxes.items(): field_values[f"box_{box_id}"] = box_data.get("value") await _fill_form_async( form_id=schedule, field_values=field_values, tenant_id=tenant_id, filling_id=str(ulid.new()), actor=payload.actor, calculation_id=calculation_id, ) except Exception as e: logger.error("Failed to handle calculation ready event", error=str(e)) async def _fill_form_async( form_id: str, field_values: dict[str, Any], tenant_id: str, filling_id: str, actor: str, calculation_id: str | None = None, ) -> None: """Fill form asynchronously""" with tracer.start_as_current_span("fill_form_async") as span: span.set_attribute("form_id", form_id) span.set_attribute("filling_id", filling_id) span.set_attribute("tenant_id", tenant_id) try: # Fill the form filled_pdf = pdf_form_filler.fill_form(form_id, field_values) # fmt: skip # pyright: ignore[reportOptionalMemberAccess] if not filled_pdf: # pylint: disable-next=broad-exception-raised raise Exception("Form filling failed") # Store filled form object_key = f"tenants/{tenant_id}/filled/{filling_id}.pdf" success = await storage_client.put_object( # fmt: skip # pyright: ignore[reportOptionalMemberAccess] bucket_name=settings.output_bucket, object_name=object_key, data=BytesIO(filled_pdf), length=len(filled_pdf), content_type="application/pdf", metadata={ "form_id": form_id, "filling_id": filling_id, "tenant_id": tenant_id, "calculation_id": calculation_id or "", "filled_at": datetime.utcnow().isoformat(), }, ) if not success: # pylint: disable-next=broad-exception-raised raise Exception("Failed to store filled form") # Update metrics metrics.counter("forms_filled_total").labels( tenant_id=tenant_id, form_id=form_id ).inc() # Publish completion event event_payload = EventPayload( data={ "filling_id": filling_id, "form_id": form_id, "tenant_id": tenant_id, "calculation_id": calculation_id, "s3_url": f"s3://{settings.output_bucket}/{object_key}", "field_count": len(field_values), }, actor=actor, tenant_id=tenant_id, ) await event_bus.publish(EventTopics.FORM_FILLED, event_payload) # fmt: skip # pyright: ignore[reportOptionalMemberAccess] logger.info( "Form filling completed", filling_id=filling_id, form_id=form_id ) except Exception as e: logger.error("Form filling failed", filling_id=filling_id, error=str(e)) # Update error metrics metrics.counter("form_filling_errors_total").labels( tenant_id=tenant_id, form_id=form_id, error_type=type(e).__name__ ).inc() async def _create_evidence_pack_async( taxpayer_id: str, tax_year: str, scope: str, evidence_items: list[dict[str, Any]], tenant_id: str, pack_id: str, actor: str, ) -> None: """Create evidence pack asynchronously""" with tracer.start_as_current_span("create_evidence_pack_async") as span: span.set_attribute("pack_id", pack_id) span.set_attribute("taxpayer_id", taxpayer_id) span.set_attribute("scope", scope) try: # Create evidence pack pack_result = await evidence_pack_generator.create_evidence_pack( # fmt: skip # pyright: ignore[reportOptionalMemberAccess] taxpayer_id=taxpayer_id, tax_year=tax_year, scope=scope, evidence_items=evidence_items, ) # Update metrics metrics.counter("evidence_packs_created_total").labels( tenant_id=tenant_id, scope=scope ).inc() logger.info( "Evidence pack created", pack_id=pack_id, pack_size=pack_result["pack_size"], evidence_count=pack_result["evidence_count"], ) except Exception as e: logger.error("Evidence pack creation failed", pack_id=pack_id, error=str(e)) @app.exception_handler(HTTPException) async def http_exception_handler(request: Request, exc: HTTPException) -> JSONResponse: """Handle HTTP exceptions with RFC7807 format""" return JSONResponse( status_code=exc.status_code, content=ErrorResponse( type=f"https://httpstatuses.com/{exc.status_code}", title=exc.detail, status=exc.status_code, detail=exc.detail, instance=str(request.url), trace_id="", ).model_dump(), ) if __name__ == "__main__": import uvicorn uvicorn.run("main:app", host="0.0.0.0", port=8009, reload=True, log_config=None)