"""Tax calculation engine with schedule computation and evidence trails.""" # mypy: disable-error-code=union-attr # FILE: apps/svc-reason/main.py # pylint: disable=wrong-import-position,import-error,too-few-public-methods,global-statement # pylint: disable=global-variable-not-assigned,raise-missing-from,unused-argument # pylint: disable=broad-exception-caught,no-else-return,too-many-arguments,too-many-positional-arguments # pylint: disable=too-many-locals,import-outside-toplevel,too-many-statements import os # Import shared libraries import sys from datetime import datetime from decimal import Decimal from typing import Any import httpx import structlog import ulid from fastapi import BackgroundTasks, Depends, HTTPException, Request from fastapi.responses import JSONResponse sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..")) from libs.app_factory import create_app from libs.config import BaseAppSettings, create_event_bus, create_neo4j_client from libs.events import EventBus, EventPayload, EventTopics from libs.neo import Neo4jClient from libs.observability import get_metrics, get_tracer, setup_observability from libs.schemas import ErrorResponse, ScheduleComputeRequest, ScheduleComputeResponse from libs.security import get_current_user, get_tenant_id logger = structlog.get_logger() class ReasonSettings(BaseAppSettings): """Settings for reasoning service""" service_name: str = "svc-reason" # Tax year configuration current_tax_year: str = "2023-24" supported_tax_years: list[str] = ["2021-22", "2022-23", "2023-24", "2024-25"] # Calculation configuration precision: int = 2 # Decimal places rounding_method: str = "ROUND_HALF_UP" # Schedule support supported_schedules: list[str] = ["SA100", "SA103", "SA105", "SA106"] # Validation max_income: float = 10000000.0 # £10M max_expenses: float = 10000000.0 # £10M # External services coverage_service_url: str = "http://svc-coverage:8000" # Create app and settings app, settings = create_app( service_name="svc-reason", title="Tax Agent Reasoning Service", description="Tax calculation engine with schedule computation", settings_class=ReasonSettings, ) # Global clients neo4j_client: Neo4jClient | None = None event_bus: EventBus | None = None http_client: httpx.AsyncClient | None = None tracer = get_tracer("svc-reason") metrics = get_metrics() @app.on_event("startup") async def startup_event() -> None: """Initialize service dependencies""" global neo4j_client, event_bus, http_client logger.info("Starting reasoning service") # Setup observability setup_observability(settings) # Initialize Neo4j client neo4j_driver = create_neo4j_client(settings) neo4j_client = Neo4jClient(neo4j_driver) # Initialize event bus event_bus = create_event_bus(settings) await event_bus.start() # fmt: skip# pyright: ignore[reportOptionalMemberAccess] # Initialize HTTP client http_client = httpx.AsyncClient() # Subscribe to KG upsert events await event_bus.subscribe(EventTopics.KG_UPSERTED, _handle_kg_upserted) # type: ignore logger.info("Reasoning service started successfully") @app.on_event("shutdown") async def shutdown_event() -> None: """Cleanup service dependencies""" global neo4j_client, event_bus, http_client logger.info("Shutting down reasoning service") if neo4j_client: await neo4j_client.close() if event_bus: await event_bus.stop() if http_client: await http_client.aclose() logger.info("Reasoning service shutdown complete") @app.get("/health") async def health_check() -> dict[str, Any]: """Health check endpoint""" return { "status": "healthy", "service": settings.service_name, "version": settings.service_version, "timestamp": datetime.utcnow().isoformat(), "supported_schedules": settings.supported_schedules, } @app.post("/compute", response_model=ScheduleComputeResponse) async def compute_schedule( request_data: ScheduleComputeRequest, background_tasks: BackgroundTasks, current_user: dict[str, Any] = Depends(get_current_user()), tenant_id: str = Depends(get_tenant_id()), ) -> ScheduleComputeResponse: """Compute tax schedule""" with tracer.start_as_current_span("compute_schedule") as span: span.set_attribute("tax_year", request_data.tax_year) span.set_attribute("taxpayer_id", request_data.taxpayer_id) span.set_attribute("schedule_id", request_data.schedule_id) span.set_attribute("tenant_id", tenant_id) try: # Validate inputs if request_data.tax_year not in settings.supported_tax_years: raise HTTPException( status_code=400, detail=f"Unsupported tax year: {request_data.tax_year}", ) if request_data.schedule_id not in settings.supported_schedules: raise HTTPException( status_code=400, detail=f"Unsupported schedule: {request_data.schedule_id}", ) # Generate calculation ID calculation_id = str(ulid.new()) span.set_attribute("calculation_id", calculation_id) # Start background computation background_tasks.add_task( _compute_schedule_async, request_data.tax_year, request_data.taxpayer_id, request_data.schedule_id, tenant_id, calculation_id, current_user.get("sub", "system"), ) logger.info( "Schedule computation started", calculation_id=calculation_id, schedule=request_data.schedule_id, ) return ScheduleComputeResponse( calculation_id=calculation_id, schedule=request_data.schedule_id, form_boxes={}, # Will be populated when computation completes evidence_trail=[], ) except HTTPException: raise except Exception as e: logger.error("Failed to start computation", error=str(e)) raise HTTPException(status_code=500, detail="Failed to start computation") @app.get("/calculations/{calculation_id}") async def get_calculation_results( calculation_id: str, current_user: dict[str, Any] = Depends(get_current_user()), tenant_id: str = Depends(get_tenant_id()), ) -> dict[str, Any]: """Get calculation results""" with tracer.start_as_current_span("get_calculation_results") as span: span.set_attribute("calculation_id", calculation_id) span.set_attribute("tenant_id", tenant_id) try: # Query calculation from Neo4j query = """ MATCH (c:Calculation {calculation_id: $calculation_id, tenant_id: $tenant_id}) WHERE c.retracted_at IS NULL RETURN c """ results = await neo4j_client.run_query( # pyright: ignore[reportOptionalMemberAccess] query, {"calculation_id": calculation_id, "tenant_id": tenant_id} ) if not results: raise HTTPException(status_code=404, detail="Calculation not found") calculation = results[0]["c"] # Get form boxes form_boxes_query = """ MATCH (c:Calculation {calculation_id: $calculation_id})-[:HAS_BOX]->(b:FormBox) WHERE c.retracted_at IS NULL AND b.retracted_at IS NULL RETURN b """ box_results = await neo4j_client.run_query( # pyright: ignore[reportOptionalMemberAccess] form_boxes_query, {"calculation_id": calculation_id} ) form_boxes = {} for box_result in box_results: box = box_result["b"] form_boxes[box["box"]] = { "value": box["value"], "description": box.get("description"), "confidence": box.get("confidence"), } return { "calculation_id": calculation_id, "schedule": calculation.get("schedule"), "tax_year": calculation.get("tax_year"), "status": calculation.get("status", "completed"), "form_boxes": form_boxes, "calculated_at": calculation.get("calculated_at"), } except HTTPException: raise except Exception as e: logger.error( "Failed to get calculation results", calculation_id=calculation_id, error=str(e), ) raise HTTPException( status_code=500, detail="Failed to get calculation results" ) async def _handle_kg_upserted(topic: str, payload: EventPayload) -> None: """Handle KG upsert events for auto-calculation and coverage check""" data = payload.data taxpayer_id = data.get("taxpayer_id") tax_year = data.get("tax_year") tenant_id = data.get("tenant_id") if not taxpayer_id or not tax_year or not tenant_id: logger.warning("Invalid KG upsert event data for coverage check", data=data) return # Trigger svc-coverage check try: if http_client: coverage_url = f"{settings.coverage_service_url}/v1/coverage/check" request_body = { "tax_year": tax_year, "taxpayer_id": taxpayer_id, } headers = { "X-Tenant-ID": tenant_id, # Assuming current_user is not directly available here, # or a system user token needs to be generated. # For now, omitting X-Authenticated-User for simplicity, # but in a real system, this should be handled securely. } response = await http_client.post(coverage_url, json=request_body, headers=headers) response.raise_for_status() coverage_report = response.json() logger.info( "Triggered svc-coverage check", taxpayer_id=taxpayer_id, tax_year=tax_year, coverage_status=coverage_report.get("overall_status"), ) # If coverage is complete, trigger calculation if coverage_report.get("overall_status") == "complete": logger.info( "Coverage complete, auto-triggering calculation", taxpayer_id=taxpayer_id, tax_year=tax_year, ) await _compute_schedule_async( tax_year=tax_year, taxpayer_id=taxpayer_id, schedule_id="SA103", # Default to self-employment tenant_id=tenant_id, calculation_id=str(ulid.new()), actor=payload.actor, ) else: logger.info( "Coverage incomplete, not triggering calculation", taxpayer_id=taxpayer_id, tax_year=tax_year, blocking_items=coverage_report.get("blocking_items"), ) except httpx.HTTPStatusError as e: logger.error( "Failed to trigger svc-coverage check due to HTTP error", taxpayer_id=taxpayer_id, tax_year=tax_year, error=str(e), response_status_code=e.response.status_code, response_text=e.response.text, ) except Exception as e: logger.error("Failed to handle KG upsert for auto-calculation or coverage check", error=str(e)) async def _compute_schedule_async( tax_year: str, taxpayer_id: str, schedule_id: str, tenant_id: str, calculation_id: str, actor: str, ) -> None: """Compute schedule asynchronously""" with tracer.start_as_current_span("compute_schedule_async") as span: span.set_attribute("calculation_id", calculation_id) span.set_attribute("schedule_id", schedule_id) span.set_attribute("tax_year", tax_year) try: # Get relevant data from knowledge graph financial_data = await _get_financial_data(taxpayer_id, tax_year, tenant_id) # Perform calculations based on schedule if schedule_id == "SA103": form_boxes, evidence_trail = await _compute_sa103( financial_data, tax_year ) elif schedule_id == "SA105": form_boxes, evidence_trail = await _compute_sa105( financial_data, tax_year ) elif schedule_id == "SA100": form_boxes, evidence_trail = await _compute_sa100( financial_data, tax_year ) else: raise ValueError(f"Unsupported schedule: {schedule_id}") # Store calculation in knowledge graph await _store_calculation( calculation_id, schedule_id, tax_year, taxpayer_id, form_boxes, evidence_trail, tenant_id, ) # Update metrics metrics.counter("calculations_completed_total").labels( tenant_id=tenant_id, schedule=schedule_id, tax_year=tax_year ).inc() # Publish completion event event_payload = EventPayload( data={ "calculation_id": calculation_id, "schedule": schedule_id, "tax_year": tax_year, "taxpayer_id": taxpayer_id, "tenant_id": tenant_id, "form_boxes": form_boxes, "box_count": len(form_boxes), }, actor=actor, tenant_id=tenant_id, ) await event_bus.publish(EventTopics.CALC_SCHEDULE_READY, event_payload) # type: ignore logger.info( "Schedule computation completed", calculation_id=calculation_id, schedule=schedule_id, boxes=len(form_boxes), ) except Exception as e: logger.error( "Schedule computation failed", calculation_id=calculation_id, error=str(e), ) # Update error metrics metrics.counter("calculation_errors_total").labels( tenant_id=tenant_id, schedule=schedule_id, error_type=type(e).__name__ ).inc() async def _get_financial_data( taxpayer_id: str, tax_year: str, tenant_id: str ) -> dict[str, Any]: """Get financial data from knowledge graph""" # Get income items income_query = """ MATCH (t:TaxpayerProfile {taxpayer_id: $taxpayer_id, tenant_id: $tenant_id})-[:HAS_INCOME]->(i:IncomeItem) WHERE i.retracted_at IS NULL AND i.tax_year = $tax_year RETURN i """ income_results = ( await neo4j_client.run_query( # pyright: ignore[reportOptionalMemberAccess] income_query, {"taxpayer_id": taxpayer_id, "tax_year": tax_year, "tenant_id": tenant_id}, ) ) # Get expense items expense_query = """ MATCH (t:TaxpayerProfile {taxpayer_id: $taxpayer_id, tenant_id: $tenant_id})-[:HAS_EXPENSE]->(e:ExpenseItem) WHERE e.retracted_at IS NULL AND e.tax_year = $tax_year RETURN e """ expense_results = ( await neo4j_client.run_query( # pyright: ignore[reportOptionalMemberAccess] expense_query, {"taxpayer_id": taxpayer_id, "tax_year": tax_year, "tenant_id": tenant_id}, ) ) return { "income_items": [result["i"] for result in income_results], "expense_items": [result["e"] for result in expense_results], "tax_year": tax_year, "taxpayer_id": taxpayer_id, } async def _compute_sa103( financial_data: dict[str, Any], tax_year: str ) -> tuple[dict[str, Any], list[dict[str, Any]]]: """Compute SA103 (Self-employment) schedule""" income_items = financial_data.get("income_items", []) expense_items = financial_data.get("expense_items", []) # Calculate totals total_turnover = Decimal("0") total_expenses = Decimal("0") evidence_trail = [] # Sum income for income in income_items: if income.get("type") == "self_employment": amount = Decimal(str(income.get("gross", 0))) total_turnover += amount evidence_trail.append( { "box": "20", "source_entity": income.get("income_id"), "amount": float(amount), "description": f"Income: {income.get('description', 'Unknown')}", } ) # Sum expenses for expense in expense_items: if expense.get("allowable", True): amount = Decimal(str(expense.get("amount", 0))) total_expenses += amount evidence_trail.append( { "box": "31", "source_entity": expense.get("expense_id"), "amount": float(amount), "description": f"Expense: {expense.get('description', 'Unknown')}", } ) # Calculate net profit net_profit = total_turnover - total_expenses # Create form boxes form_boxes = { "20": { "value": float(total_turnover), "description": "Total turnover", "confidence": 0.9, }, "31": { "value": float(total_expenses), "description": "Total allowable business expenses", "confidence": 0.9, }, "32": { "value": float(net_profit), "description": "Net profit", "confidence": 0.9, }, } return form_boxes, evidence_trail async def _compute_sa105( financial_data: dict[str, Any], tax_year: str ) -> tuple[dict[str, Any], list[dict[str, Any]]]: """Compute SA105 (Property income) schedule""" income_items = financial_data.get("income_items", []) expense_items = financial_data.get("expense_items", []) # Calculate property income and expenses total_rents = Decimal("0") total_property_expenses = Decimal("0") evidence_trail = [] # Sum property income for income in income_items: if income.get("type") == "property": amount = Decimal(str(income.get("gross", 0))) total_rents += amount evidence_trail.append( { "box": "20", "source_entity": income.get("income_id"), "amount": float(amount), "description": f"Property income: {income.get('description', 'Unknown')}", } ) # Sum property expenses for expense in expense_items: if expense.get("type") == "property" and expense.get("allowable", True): amount = Decimal(str(expense.get("amount", 0))) total_property_expenses += amount # Map to appropriate SA105 box based on expense category box = _map_property_expense_to_box(expense.get("category", "other")) evidence_trail.append( { "box": box, "source_entity": expense.get("expense_id"), "amount": float(amount), "description": f"Property expense: {expense.get('description', 'Unknown')}", } ) # Calculate net property income net_property_income = total_rents - total_property_expenses form_boxes = { "20": { "value": float(total_rents), "description": "Total rents and other income", "confidence": 0.9, }, "38": { "value": float(total_property_expenses), "description": "Total property expenses", "confidence": 0.9, }, "net_income": { "value": float(net_property_income), "description": "Net property income", "confidence": 0.9, }, } return form_boxes, evidence_trail async def _compute_sa100( financial_data: dict[str, Any], tax_year: str ) -> tuple[dict[str, Any], list[dict[str, Any]]]: """Compute SA100 (Main return) schedule by aggregating other schedules""" form_boxes = {} evidence_trail: list[dict[str, Any]] = [] taxpayer_id = financial_data.get("taxpayer_id") tenant_id = financial_data.get("tenant_id") # Assuming tenant_id is passed in financial_data if not taxpayer_id or not tenant_id: raise ValueError("Taxpayer ID or Tenant ID missing for SA100 computation") # Get latest SA103 calculation sa103_query = """ MATCH (t:TaxpayerProfile {taxpayer_id: $taxpayer_id, tenant_id: $tenant_id})-[:HAS_CALCULATION]->(c:Calculation) WHERE c.schedule = 'SA103' AND c.tax_year = $tax_year AND c.retracted_at IS NULL OPTIONAL MATCH (c)-[:HAS_BOX]->(b:FormBox) RETURN c.calculation_id AS calculation_id, c.calculated_at AS calculated_at, COLLECT({box: b.box, value: b.value, description: b.description, confidence: b.confidence}) AS form_boxes ORDER BY c.calculated_at DESC LIMIT 1 """ sa103_results = await neo4j_client.run_query( # type: ignore sa103_query, {"taxpayer_id": taxpayer_id, "tenant_id": tenant_id, "tax_year": tax_year} ) sa103_calc = sa103_results[0] if sa103_results else None sa103_net_profit = Decimal("0") if sa103_calc and sa103_calc["form_boxes"]: for box in sa103_calc["form_boxes"]: if box["box"] == "32": # Net profit box in SA103 sa103_net_profit = Decimal(str(box["value"])) form_boxes["SA103_32"] = {"value": float(sa103_net_profit), "description": "SA103 Net Profit", "confidence": box.get("confidence", 0.9)} evidence_trail.append({ "box": "SA103_32", "source_calculation_id": sa103_calc["calculation_id"], "description": "Derived from SA103 Net Profit" }) break # Get latest SA105 calculation sa105_query = """ MATCH (t:TaxpayerProfile {taxpayer_id: $taxpayer_id, tenant_id: $tenant_id})-[:HAS_CALCULATION]->(c:Calculation) WHERE c.schedule = 'SA105' AND c.tax_year = $tax_year AND c.retracted_at IS NULL OPTIONAL MATCH (c)-[:HAS_BOX]->(b:FormBox) RETURN c.calculation_id AS calculation_id, c.calculated_at AS calculated_at, COLLECT({box: b.box, value: b.value, description: b.description, confidence: b.confidence}) AS form_boxes ORDER BY c.calculated_at DESC LIMIT 1 """ sa105_results = await neo4j_client.run_query( # type: ignore sa105_query, {"taxpayer_id": taxpayer_id, "tenant_id": tenant_id, "tax_year": tax_year} ) sa105_calc = sa105_results[0] if sa105_results else None sa105_net_income = Decimal("0") if sa105_calc and sa105_calc["form_boxes"]: for box in sa105_calc["form_boxes"]: if box["box"] == "net_income": # Net property income box in SA105 (custom box for internal calculation) sa105_net_income = Decimal(str(box["value"])) form_boxes["SA105_net_income"] = {"value": float(sa105_net_income), "description": "SA105 Net Property Income", "confidence": box.get("confidence", 0.9)} evidence_trail.append({ "box": "SA105_net_income", "source_calculation_id": sa105_calc["calculation_id"], "description": "Derived from SA105 Net Property Income" }) break # Aggregate total income for SA100 total_income = sa103_net_profit + sa105_net_income form_boxes["SA100_total_income"] = { "value": float(total_income), "description": "Total income from all sources", "confidence": 0.95 # Higher confidence for aggregated value } evidence_trail.append({ "box": "SA100_total_income", "derived_from": ["SA103_32", "SA105_net_income"], "description": "Aggregated from SA103 net profit and SA105 net property income" }) # Example: Basic personal allowance (simplified) personal_allowance = Decimal("12570") # For 2023-24 if total_income > Decimal("100000"): # Tapering not implemented here personal_allowance = Decimal("0") form_boxes["SA100_personal_allowance"] = { "value": float(personal_allowance), "description": "Personal Allowance", "confidence": 0.99 } evidence_trail.append({ "box": "SA100_personal_allowance", "source": "HMRC_guidance", "description": f"Standard personal allowance for {tax_year}" }) # Placeholder for actual SA100 boxes and complex calculations # This would involve detailed tax band calculations, reliefs, etc. # For now, we'll just show the aggregation. form_boxes["1"] = {"value": "John Doe (Aggregated)", "description": "Your name", "confidence": 0.9} return form_boxes, evidence_trail def _map_property_expense_to_box(category: str) -> str: """Map property expense category to SA105 box""" mapping = { "rent_rates_insurance": "31", "property_management": "32", "services_wages": "33", "repairs_maintenance": "34", "finance_costs": "35", "professional_fees": "36", "costs_of_services": "37", "other": "38", } return mapping.get(category, "38") async def _store_calculation( calculation_id: str, schedule: str, tax_year: str, taxpayer_id: str, form_boxes: dict[str, Any], evidence_trail: list[dict[str, Any]], tenant_id: str, ) -> None: """Store calculation results in knowledge graph""" # Create calculation node calc_properties = { "calculation_id": calculation_id, "schedule": schedule, "tax_year": tax_year, "taxpayer_id": taxpayer_id, "tenant_id": tenant_id, "calculated_at": datetime.utcnow().isoformat(), "status": "completed", "source": "reasoning_engine", "extractor_version": "1.0.0", "valid_from": datetime.utcnow(), "asserted_at": datetime.utcnow(), } await neo4j_client.create_node("Calculation", calc_properties) # fmt: skip # pyright: ignore[reportOptionalMemberAccess] # Create form box nodes for box_id, box_data in form_boxes.items(): box_properties = { "form": schedule, "box": box_id, "value": box_data["value"], "description": box_data.get("description"), "confidence": box_data.get("confidence"), "calculation_id": calculation_id, "tenant_id": tenant_id, "source": "reasoning_engine", "extractor_version": "1.0.0", "valid_from": datetime.utcnow(), "asserted_at": datetime.utcnow(), } await neo4j_client.create_node("FormBox", box_properties) # fmt: skip # pyright: ignore[reportOptionalMemberAccess] # Create relationship await neo4j_client.create_relationship( # pyright: ignore[reportOptionalMemberAccess] "Calculation", calculation_id, "FormBox", f"{calculation_id}_{box_id}", "HAS_BOX", ) @app.exception_handler(HTTPException) async def http_exception_handler(request: Request, exc: HTTPException) -> JSONResponse: """Handle HTTP exceptions with RFC7807 format""" return JSONResponse( status_code=exc.status_code, content=ErrorResponse( type=f"https://httpstatuses.com/{exc.status_code}", title=exc.detail, status=exc.status_code, detail=exc.detail, instance=str(request.url), trace_id=getattr(request.state, "trace_id", None), ).model_dump(), ) if __name__ == "__main__": import uvicorn uvicorn.run("main:app", host="0.0.0.0", port=8008, reload=True, log_config=None)