"""Tax calculation engine with schedule computation and evidence trails.""" # mypy: disable-error-code=union-attr # FILE: apps/svc-reason/main.py # pylint: disable=wrong-import-position,import-error,too-few-public-methods,global-statement # pylint: disable=global-variable-not-assigned,raise-missing-from,unused-argument # pylint: disable=broad-exception-caught,no-else-return,too-many-arguments,too-many-positional-arguments # pylint: disable=too-many-locals,import-outside-toplevel,too-many-statements import os # Import shared libraries import sys from datetime import datetime from decimal import Decimal from typing import Any import structlog import ulid from fastapi import BackgroundTasks, Depends, HTTPException, Request from fastapi.responses import JSONResponse sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..")) from libs.app_factory import create_app from libs.config import BaseAppSettings, create_event_bus, create_neo4j_client from libs.events import EventBus, EventPayload, EventTopics from libs.neo import Neo4jClient from libs.observability import get_metrics, get_tracer, setup_observability from libs.schemas import ErrorResponse, ScheduleComputeRequest, ScheduleComputeResponse from libs.security import get_current_user, get_tenant_id logger = structlog.get_logger() class ReasonSettings(BaseAppSettings): """Settings for reasoning service""" service_name: str = "svc-reason" # Tax year configuration current_tax_year: str = "2023-24" supported_tax_years: list[str] = ["2021-22", "2022-23", "2023-24", "2024-25"] # Calculation configuration precision: int = 2 # Decimal places rounding_method: str = "ROUND_HALF_UP" # Schedule support supported_schedules: list[str] = ["SA100", "SA103", "SA105", "SA106"] # Validation max_income: float = 10000000.0 # £10M max_expenses: float = 10000000.0 # £10M # Create app and settings app, settings = create_app( service_name="svc-reason", title="Tax Agent Reasoning Service", description="Tax calculation engine with schedule computation", settings_class=ReasonSettings, ) # Global clients neo4j_client: Neo4jClient | None = None event_bus: EventBus | None = None tracer = get_tracer("svc-reason") metrics = get_metrics() @app.on_event("startup") async def startup_event() -> None: """Initialize service dependencies""" global neo4j_client, event_bus logger.info("Starting reasoning service") # Setup observability setup_observability(settings) # Initialize Neo4j client neo4j_driver = create_neo4j_client(settings) neo4j_client = Neo4jClient(neo4j_driver) # Initialize event bus event_bus = create_event_bus(settings) await event_bus.start() # fmt: skip# pyright: ignore[reportOptionalMemberAccess] # Subscribe to KG upsert events await event_bus.subscribe(EventTopics.KG_UPSERTED, _handle_kg_upserted) # type: ignore logger.info("Reasoning service started successfully") @app.on_event("shutdown") async def shutdown_event() -> None: """Cleanup service dependencies""" global neo4j_client, event_bus logger.info("Shutting down reasoning service") if neo4j_client: await neo4j_client.close() if event_bus: await event_bus.stop() logger.info("Reasoning service shutdown complete") @app.get("/health") async def health_check() -> dict[str, Any]: """Health check endpoint""" return { "status": "healthy", "service": settings.service_name, "version": settings.service_version, "timestamp": datetime.utcnow().isoformat(), "supported_schedules": settings.supported_schedules, } @app.post("/compute", response_model=ScheduleComputeResponse) async def compute_schedule( request_data: ScheduleComputeRequest, background_tasks: BackgroundTasks, current_user: dict[str, Any] = Depends(get_current_user()), tenant_id: str = Depends(get_tenant_id()), ) -> ScheduleComputeResponse: """Compute tax schedule""" with tracer.start_as_current_span("compute_schedule") as span: span.set_attribute("tax_year", request_data.tax_year) span.set_attribute("taxpayer_id", request_data.taxpayer_id) span.set_attribute("schedule_id", request_data.schedule_id) span.set_attribute("tenant_id", tenant_id) try: # Validate inputs if request_data.tax_year not in settings.supported_tax_years: raise HTTPException( status_code=400, detail=f"Unsupported tax year: {request_data.tax_year}", ) if request_data.schedule_id not in settings.supported_schedules: raise HTTPException( status_code=400, detail=f"Unsupported schedule: {request_data.schedule_id}", ) # Generate calculation ID calculation_id = str(ulid.new()) span.set_attribute("calculation_id", calculation_id) # Start background computation background_tasks.add_task( _compute_schedule_async, request_data.tax_year, request_data.taxpayer_id, request_data.schedule_id, tenant_id, calculation_id, current_user.get("sub", "system"), ) logger.info( "Schedule computation started", calculation_id=calculation_id, schedule=request_data.schedule_id, ) return ScheduleComputeResponse( calculation_id=calculation_id, schedule=request_data.schedule_id, form_boxes={}, # Will be populated when computation completes evidence_trail=[], ) except HTTPException: raise except Exception as e: logger.error("Failed to start computation", error=str(e)) raise HTTPException(status_code=500, detail="Failed to start computation") @app.get("/calculations/{calculation_id}") async def get_calculation_results( calculation_id: str, current_user: dict[str, Any] = Depends(get_current_user()), tenant_id: str = Depends(get_tenant_id()), ) -> dict[str, Any]: """Get calculation results""" with tracer.start_as_current_span("get_calculation_results") as span: span.set_attribute("calculation_id", calculation_id) span.set_attribute("tenant_id", tenant_id) try: # Query calculation from Neo4j query = """ MATCH (c:Calculation {calculation_id: $calculation_id, tenant_id: $tenant_id}) WHERE c.retracted_at IS NULL RETURN c """ results = await neo4j_client.run_query( # pyright: ignore[reportOptionalMemberAccess] query, {"calculation_id": calculation_id, "tenant_id": tenant_id} ) if not results: raise HTTPException(status_code=404, detail="Calculation not found") calculation = results[0]["c"] # Get form boxes form_boxes_query = """ MATCH (c:Calculation {calculation_id: $calculation_id})-[:HAS_BOX]->(b:FormBox) WHERE c.retracted_at IS NULL AND b.retracted_at IS NULL RETURN b """ box_results = await neo4j_client.run_query( # pyright: ignore[reportOptionalMemberAccess] form_boxes_query, {"calculation_id": calculation_id} ) form_boxes = {} for box_result in box_results: box = box_result["b"] form_boxes[box["box"]] = { "value": box["value"], "description": box.get("description"), "confidence": box.get("confidence"), } return { "calculation_id": calculation_id, "schedule": calculation.get("schedule"), "tax_year": calculation.get("tax_year"), "status": calculation.get("status", "completed"), "form_boxes": form_boxes, "calculated_at": calculation.get("calculated_at"), } except HTTPException: raise except Exception as e: logger.error( "Failed to get calculation results", calculation_id=calculation_id, error=str(e), ) raise HTTPException( status_code=500, detail="Failed to get calculation results" ) async def _handle_kg_upserted(topic: str, payload: EventPayload) -> None: """Handle KG upsert events for auto-calculation""" try: data = payload.data entities = data.get("entities", []) tenant_id = data.get("tenant_id") # Check if we have enough data for calculation has_income = any(e.get("type") == "IncomeItem" for e in entities) has_expenses = any(e.get("type") == "ExpenseItem" for e in entities) if has_income or has_expenses: logger.info( "Auto-triggering calculation due to new financial data", tenant_id=tenant_id, ) # Find taxpayer ID from entities taxpayer_id = None for entity in entities: if entity.get("type") == "TaxpayerProfile": taxpayer_id = entity.get("id") break if taxpayer_id: await _compute_schedule_async( tax_year=settings.current_tax_year, taxpayer_id=taxpayer_id, schedule_id="SA103", # Default to self-employment tenant_id=tenant_id or "", calculation_id=str(ulid.new()), actor=payload.actor, ) except Exception as e: logger.error("Failed to handle KG upsert for auto-calculation", error=str(e)) async def _compute_schedule_async( tax_year: str, taxpayer_id: str, schedule_id: str, tenant_id: str, calculation_id: str, actor: str, ) -> None: """Compute schedule asynchronously""" with tracer.start_as_current_span("compute_schedule_async") as span: span.set_attribute("calculation_id", calculation_id) span.set_attribute("schedule_id", schedule_id) span.set_attribute("tax_year", tax_year) try: # Get relevant data from knowledge graph financial_data = await _get_financial_data(taxpayer_id, tax_year, tenant_id) # Perform calculations based on schedule if schedule_id == "SA103": form_boxes, evidence_trail = await _compute_sa103( financial_data, tax_year ) elif schedule_id == "SA105": form_boxes, evidence_trail = await _compute_sa105( financial_data, tax_year ) elif schedule_id == "SA100": form_boxes, evidence_trail = await _compute_sa100( financial_data, tax_year ) else: raise ValueError(f"Unsupported schedule: {schedule_id}") # Store calculation in knowledge graph await _store_calculation( calculation_id, schedule_id, tax_year, taxpayer_id, form_boxes, evidence_trail, tenant_id, ) # Update metrics metrics.counter("calculations_completed_total").labels( tenant_id=tenant_id, schedule=schedule_id, tax_year=tax_year ).inc() # Publish completion event event_payload = EventPayload( data={ "calculation_id": calculation_id, "schedule": schedule_id, "tax_year": tax_year, "taxpayer_id": taxpayer_id, "tenant_id": tenant_id, "form_boxes": form_boxes, "box_count": len(form_boxes), }, actor=actor, tenant_id=tenant_id, ) await event_bus.publish(EventTopics.CALC_SCHEDULE_READY, event_payload) # type: ignore logger.info( "Schedule computation completed", calculation_id=calculation_id, schedule=schedule_id, boxes=len(form_boxes), ) except Exception as e: logger.error( "Schedule computation failed", calculation_id=calculation_id, error=str(e), ) # Update error metrics metrics.counter("calculation_errors_total").labels( tenant_id=tenant_id, schedule=schedule_id, error_type=type(e).__name__ ).inc() async def _get_financial_data( taxpayer_id: str, tax_year: str, tenant_id: str ) -> dict[str, Any]: """Get financial data from knowledge graph""" # Get income items income_query = """ MATCH (t:TaxpayerProfile {taxpayer_id: $taxpayer_id, tenant_id: $tenant_id})-[:HAS_INCOME]->(i:IncomeItem) WHERE i.retracted_at IS NULL AND i.tax_year = $tax_year RETURN i """ income_results = ( await neo4j_client.run_query( # pyright: ignore[reportOptionalMemberAccess] income_query, {"taxpayer_id": taxpayer_id, "tax_year": tax_year, "tenant_id": tenant_id}, ) ) # Get expense items expense_query = """ MATCH (t:TaxpayerProfile {taxpayer_id: $taxpayer_id, tenant_id: $tenant_id})-[:HAS_EXPENSE]->(e:ExpenseItem) WHERE e.retracted_at IS NULL AND e.tax_year = $tax_year RETURN e """ expense_results = ( await neo4j_client.run_query( # pyright: ignore[reportOptionalMemberAccess] expense_query, {"taxpayer_id": taxpayer_id, "tax_year": tax_year, "tenant_id": tenant_id}, ) ) return { "income_items": [result["i"] for result in income_results], "expense_items": [result["e"] for result in expense_results], "tax_year": tax_year, "taxpayer_id": taxpayer_id, } async def _compute_sa103( financial_data: dict[str, Any], tax_year: str ) -> tuple[dict[str, Any], list[dict[str, Any]]]: """Compute SA103 (Self-employment) schedule""" income_items = financial_data.get("income_items", []) expense_items = financial_data.get("expense_items", []) # Calculate totals total_turnover = Decimal("0") total_expenses = Decimal("0") evidence_trail = [] # Sum income for income in income_items: if income.get("type") == "self_employment": amount = Decimal(str(income.get("gross", 0))) total_turnover += amount evidence_trail.append( { "box": "20", "source_entity": income.get("income_id"), "amount": float(amount), "description": f"Income: {income.get('description', 'Unknown')}", } ) # Sum expenses for expense in expense_items: if expense.get("allowable", True): amount = Decimal(str(expense.get("amount", 0))) total_expenses += amount evidence_trail.append( { "box": "31", "source_entity": expense.get("expense_id"), "amount": float(amount), "description": f"Expense: {expense.get('description', 'Unknown')}", } ) # Calculate net profit net_profit = total_turnover - total_expenses # Create form boxes form_boxes = { "20": { "value": float(total_turnover), "description": "Total turnover", "confidence": 0.9, }, "31": { "value": float(total_expenses), "description": "Total allowable business expenses", "confidence": 0.9, }, "32": { "value": float(net_profit), "description": "Net profit", "confidence": 0.9, }, } return form_boxes, evidence_trail async def _compute_sa105( financial_data: dict[str, Any], tax_year: str ) -> tuple[dict[str, Any], list[dict[str, Any]]]: """Compute SA105 (Property income) schedule""" income_items = financial_data.get("income_items", []) expense_items = financial_data.get("expense_items", []) # Calculate property income and expenses total_rents = Decimal("0") total_property_expenses = Decimal("0") evidence_trail = [] # Sum property income for income in income_items: if income.get("type") == "property": amount = Decimal(str(income.get("gross", 0))) total_rents += amount evidence_trail.append( { "box": "20", "source_entity": income.get("income_id"), "amount": float(amount), "description": f"Property income: {income.get('description', 'Unknown')}", } ) # Sum property expenses for expense in expense_items: if expense.get("type") == "property" and expense.get("allowable", True): amount = Decimal(str(expense.get("amount", 0))) total_property_expenses += amount # Map to appropriate SA105 box based on expense category box = _map_property_expense_to_box(expense.get("category", "other")) evidence_trail.append( { "box": box, "source_entity": expense.get("expense_id"), "amount": float(amount), "description": f"Property expense: {expense.get('description', 'Unknown')}", } ) # Calculate net property income net_property_income = total_rents - total_property_expenses form_boxes = { "20": { "value": float(total_rents), "description": "Total rents and other income", "confidence": 0.9, }, "38": { "value": float(total_property_expenses), "description": "Total property expenses", "confidence": 0.9, }, "net_income": { "value": float(net_property_income), "description": "Net property income", "confidence": 0.9, }, } return form_boxes, evidence_trail async def _compute_sa100( financial_data: dict[str, Any], tax_year: str ) -> tuple[dict[str, Any], list[dict[str, Any]]]: """Compute SA100 (Main return) schedule""" # This would aggregate from other schedules # For now, return basic structure form_boxes = { "1": {"value": "John Doe", "description": "Your name", "confidence": 0.9} } evidence_trail: list[dict[str, Any]] = [] return form_boxes, evidence_trail def _map_property_expense_to_box(category: str) -> str: """Map property expense category to SA105 box""" mapping = { "rent_rates_insurance": "31", "property_management": "32", "services_wages": "33", "repairs_maintenance": "34", "finance_costs": "35", "professional_fees": "36", "costs_of_services": "37", "other": "38", } return mapping.get(category, "38") async def _store_calculation( calculation_id: str, schedule: str, tax_year: str, taxpayer_id: str, form_boxes: dict[str, Any], evidence_trail: list[dict[str, Any]], tenant_id: str, ) -> None: """Store calculation results in knowledge graph""" # Create calculation node calc_properties = { "calculation_id": calculation_id, "schedule": schedule, "tax_year": tax_year, "taxpayer_id": taxpayer_id, "tenant_id": tenant_id, "calculated_at": datetime.utcnow().isoformat(), "status": "completed", "source": "reasoning_engine", "extractor_version": "1.0.0", "valid_from": datetime.utcnow(), "asserted_at": datetime.utcnow(), } await neo4j_client.create_node("Calculation", calc_properties) # fmt: skip # pyright: ignore[reportOptionalMemberAccess] # Create form box nodes for box_id, box_data in form_boxes.items(): box_properties = { "form": schedule, "box": box_id, "value": box_data["value"], "description": box_data.get("description"), "confidence": box_data.get("confidence"), "calculation_id": calculation_id, "tenant_id": tenant_id, "source": "reasoning_engine", "extractor_version": "1.0.0", "valid_from": datetime.utcnow(), "asserted_at": datetime.utcnow(), } await neo4j_client.create_node("FormBox", box_properties) # fmt: skip # pyright: ignore[reportOptionalMemberAccess] # Create relationship await neo4j_client.create_relationship( # pyright: ignore[reportOptionalMemberAccess] "Calculation", calculation_id, "FormBox", f"{calculation_id}_{box_id}", "HAS_BOX", ) @app.exception_handler(HTTPException) async def http_exception_handler(request: Request, exc: HTTPException) -> JSONResponse: """Handle HTTP exceptions with RFC7807 format""" return JSONResponse( status_code=exc.status_code, content=ErrorResponse( type=f"https://httpstatuses.com/{exc.status_code}", title=exc.detail, status=exc.status_code, detail=exc.detail, instance=str(request.url), trace_id=getattr(request.state, "trace_id", None), ).model_dump(), ) if __name__ == "__main__": import uvicorn uvicorn.run("main:app", host="0.0.0.0", port=8008, reload=True, log_config=None)