Files
ai-tax-agent/apps/svc_reason/main.py
harkon b324ff09ef
Some checks failed
CI/CD Pipeline / Code Quality & Linting (push) Has been cancelled
CI/CD Pipeline / Policy Validation (push) Has been cancelled
CI/CD Pipeline / Test Suite (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-firm-connectors) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-forms) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-hmrc) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ingestion) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-normalize-map) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ocr) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-indexer) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-reason) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rpa) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (ui-review) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (ui-review) (push) Has been cancelled
CI/CD Pipeline / Generate SBOM (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Notifications (push) Has been cancelled
Initial commit
2025-10-11 08:41:36 +01:00

678 lines
22 KiB
Python

"""Tax calculation engine with schedule computation and evidence trails."""
# mypy: disable-error-code=union-attr
# FILE: apps/svc-reason/main.py
# pylint: disable=wrong-import-position,import-error,too-few-public-methods,global-statement
# pylint: disable=global-variable-not-assigned,raise-missing-from,unused-argument
# pylint: disable=broad-exception-caught,no-else-return,too-many-arguments,too-many-positional-arguments
# pylint: disable=too-many-locals,import-outside-toplevel,too-many-statements
import os
# Import shared libraries
import sys
from datetime import datetime
from decimal import Decimal
from typing import Any
import structlog
import ulid
from fastapi import BackgroundTasks, Depends, HTTPException, Request
from fastapi.responses import JSONResponse
sys.path.append(os.path.join(os.path.dirname(__file__), "..", ".."))
from libs.app_factory import create_app
from libs.config import BaseAppSettings, create_event_bus, create_neo4j_client
from libs.events import EventBus, EventPayload, EventTopics
from libs.neo import Neo4jClient
from libs.observability import get_metrics, get_tracer, setup_observability
from libs.schemas import ErrorResponse, ScheduleComputeRequest, ScheduleComputeResponse
from libs.security import get_current_user, get_tenant_id
logger = structlog.get_logger()
class ReasonSettings(BaseAppSettings):
"""Settings for reasoning service"""
service_name: str = "svc-reason"
# Tax year configuration
current_tax_year: str = "2023-24"
supported_tax_years: list[str] = ["2021-22", "2022-23", "2023-24", "2024-25"]
# Calculation configuration
precision: int = 2 # Decimal places
rounding_method: str = "ROUND_HALF_UP"
# Schedule support
supported_schedules: list[str] = ["SA100", "SA103", "SA105", "SA106"]
# Validation
max_income: float = 10000000.0 # £10M
max_expenses: float = 10000000.0 # £10M
# Create app and settings
app, settings = create_app(
service_name="svc-reason",
title="Tax Agent Reasoning Service",
description="Tax calculation engine with schedule computation",
settings_class=ReasonSettings,
)
# Global clients
neo4j_client: Neo4jClient | None = None
event_bus: EventBus | None = None
tracer = get_tracer("svc-reason")
metrics = get_metrics()
@app.on_event("startup")
async def startup_event() -> None:
"""Initialize service dependencies"""
global neo4j_client, event_bus
logger.info("Starting reasoning service")
# Setup observability
setup_observability(settings)
# Initialize Neo4j client
neo4j_driver = create_neo4j_client(settings)
neo4j_client = Neo4jClient(neo4j_driver)
# Initialize event bus
event_bus = create_event_bus(settings)
await event_bus.start() # fmt: skip# pyright: ignore[reportOptionalMemberAccess]
# Subscribe to KG upsert events
await event_bus.subscribe(EventTopics.KG_UPSERTED, _handle_kg_upserted) # type: ignore
logger.info("Reasoning service started successfully")
@app.on_event("shutdown")
async def shutdown_event() -> None:
"""Cleanup service dependencies"""
global neo4j_client, event_bus
logger.info("Shutting down reasoning service")
if neo4j_client:
await neo4j_client.close()
if event_bus:
await event_bus.stop()
logger.info("Reasoning service shutdown complete")
@app.get("/health")
async def health_check() -> dict[str, Any]:
"""Health check endpoint"""
return {
"status": "healthy",
"service": settings.service_name,
"version": settings.service_version,
"timestamp": datetime.utcnow().isoformat(),
"supported_schedules": settings.supported_schedules,
}
@app.post("/compute", response_model=ScheduleComputeResponse)
async def compute_schedule(
request_data: ScheduleComputeRequest,
background_tasks: BackgroundTasks,
current_user: dict[str, Any] = Depends(get_current_user()),
tenant_id: str = Depends(get_tenant_id()),
) -> ScheduleComputeResponse:
"""Compute tax schedule"""
with tracer.start_as_current_span("compute_schedule") as span:
span.set_attribute("tax_year", request_data.tax_year)
span.set_attribute("taxpayer_id", request_data.taxpayer_id)
span.set_attribute("schedule_id", request_data.schedule_id)
span.set_attribute("tenant_id", tenant_id)
try:
# Validate inputs
if request_data.tax_year not in settings.supported_tax_years:
raise HTTPException(
status_code=400,
detail=f"Unsupported tax year: {request_data.tax_year}",
)
if request_data.schedule_id not in settings.supported_schedules:
raise HTTPException(
status_code=400,
detail=f"Unsupported schedule: {request_data.schedule_id}",
)
# Generate calculation ID
calculation_id = str(ulid.new())
span.set_attribute("calculation_id", calculation_id)
# Start background computation
background_tasks.add_task(
_compute_schedule_async,
request_data.tax_year,
request_data.taxpayer_id,
request_data.schedule_id,
tenant_id,
calculation_id,
current_user.get("sub", "system"),
)
logger.info(
"Schedule computation started",
calculation_id=calculation_id,
schedule=request_data.schedule_id,
)
return ScheduleComputeResponse(
calculation_id=calculation_id,
schedule=request_data.schedule_id,
form_boxes={}, # Will be populated when computation completes
evidence_trail=[],
)
except HTTPException:
raise
except Exception as e:
logger.error("Failed to start computation", error=str(e))
raise HTTPException(status_code=500, detail="Failed to start computation")
@app.get("/calculations/{calculation_id}")
async def get_calculation_results(
calculation_id: str,
current_user: dict[str, Any] = Depends(get_current_user()),
tenant_id: str = Depends(get_tenant_id()),
) -> dict[str, Any]:
"""Get calculation results"""
with tracer.start_as_current_span("get_calculation_results") as span:
span.set_attribute("calculation_id", calculation_id)
span.set_attribute("tenant_id", tenant_id)
try:
# Query calculation from Neo4j
query = """
MATCH (c:Calculation {calculation_id: $calculation_id, tenant_id: $tenant_id})
WHERE c.retracted_at IS NULL
RETURN c
"""
results = await neo4j_client.run_query( # pyright: ignore[reportOptionalMemberAccess]
query, {"calculation_id": calculation_id, "tenant_id": tenant_id}
)
if not results:
raise HTTPException(status_code=404, detail="Calculation not found")
calculation = results[0]["c"]
# Get form boxes
form_boxes_query = """
MATCH (c:Calculation {calculation_id: $calculation_id})-[:HAS_BOX]->(b:FormBox)
WHERE c.retracted_at IS NULL AND b.retracted_at IS NULL
RETURN b
"""
box_results = await neo4j_client.run_query( # pyright: ignore[reportOptionalMemberAccess]
form_boxes_query, {"calculation_id": calculation_id}
)
form_boxes = {}
for box_result in box_results:
box = box_result["b"]
form_boxes[box["box"]] = {
"value": box["value"],
"description": box.get("description"),
"confidence": box.get("confidence"),
}
return {
"calculation_id": calculation_id,
"schedule": calculation.get("schedule"),
"tax_year": calculation.get("tax_year"),
"status": calculation.get("status", "completed"),
"form_boxes": form_boxes,
"calculated_at": calculation.get("calculated_at"),
}
except HTTPException:
raise
except Exception as e:
logger.error(
"Failed to get calculation results",
calculation_id=calculation_id,
error=str(e),
)
raise HTTPException(
status_code=500, detail="Failed to get calculation results"
)
async def _handle_kg_upserted(topic: str, payload: EventPayload) -> None:
"""Handle KG upsert events for auto-calculation"""
try:
data = payload.data
entities = data.get("entities", [])
tenant_id = data.get("tenant_id")
# Check if we have enough data for calculation
has_income = any(e.get("type") == "IncomeItem" for e in entities)
has_expenses = any(e.get("type") == "ExpenseItem" for e in entities)
if has_income or has_expenses:
logger.info(
"Auto-triggering calculation due to new financial data",
tenant_id=tenant_id,
)
# Find taxpayer ID from entities
taxpayer_id = None
for entity in entities:
if entity.get("type") == "TaxpayerProfile":
taxpayer_id = entity.get("id")
break
if taxpayer_id:
await _compute_schedule_async(
tax_year=settings.current_tax_year,
taxpayer_id=taxpayer_id,
schedule_id="SA103", # Default to self-employment
tenant_id=tenant_id or "",
calculation_id=str(ulid.new()),
actor=payload.actor,
)
except Exception as e:
logger.error("Failed to handle KG upsert for auto-calculation", error=str(e))
async def _compute_schedule_async(
tax_year: str,
taxpayer_id: str,
schedule_id: str,
tenant_id: str,
calculation_id: str,
actor: str,
) -> None:
"""Compute schedule asynchronously"""
with tracer.start_as_current_span("compute_schedule_async") as span:
span.set_attribute("calculation_id", calculation_id)
span.set_attribute("schedule_id", schedule_id)
span.set_attribute("tax_year", tax_year)
try:
# Get relevant data from knowledge graph
financial_data = await _get_financial_data(taxpayer_id, tax_year, tenant_id)
# Perform calculations based on schedule
if schedule_id == "SA103":
form_boxes, evidence_trail = await _compute_sa103(
financial_data, tax_year
)
elif schedule_id == "SA105":
form_boxes, evidence_trail = await _compute_sa105(
financial_data, tax_year
)
elif schedule_id == "SA100":
form_boxes, evidence_trail = await _compute_sa100(
financial_data, tax_year
)
else:
raise ValueError(f"Unsupported schedule: {schedule_id}")
# Store calculation in knowledge graph
await _store_calculation(
calculation_id,
schedule_id,
tax_year,
taxpayer_id,
form_boxes,
evidence_trail,
tenant_id,
)
# Update metrics
metrics.counter("calculations_completed_total").labels(
tenant_id=tenant_id, schedule=schedule_id, tax_year=tax_year
).inc()
# Publish completion event
event_payload = EventPayload(
data={
"calculation_id": calculation_id,
"schedule": schedule_id,
"tax_year": tax_year,
"taxpayer_id": taxpayer_id,
"tenant_id": tenant_id,
"form_boxes": form_boxes,
"box_count": len(form_boxes),
},
actor=actor,
tenant_id=tenant_id,
)
await event_bus.publish(EventTopics.CALC_SCHEDULE_READY, event_payload) # type: ignore
logger.info(
"Schedule computation completed",
calculation_id=calculation_id,
schedule=schedule_id,
boxes=len(form_boxes),
)
except Exception as e:
logger.error(
"Schedule computation failed",
calculation_id=calculation_id,
error=str(e),
)
# Update error metrics
metrics.counter("calculation_errors_total").labels(
tenant_id=tenant_id, schedule=schedule_id, error_type=type(e).__name__
).inc()
async def _get_financial_data(
taxpayer_id: str, tax_year: str, tenant_id: str
) -> dict[str, Any]:
"""Get financial data from knowledge graph"""
# Get income items
income_query = """
MATCH (t:TaxpayerProfile {taxpayer_id: $taxpayer_id, tenant_id: $tenant_id})-[:HAS_INCOME]->(i:IncomeItem)
WHERE i.retracted_at IS NULL
AND i.tax_year = $tax_year
RETURN i
"""
income_results = (
await neo4j_client.run_query( # pyright: ignore[reportOptionalMemberAccess]
income_query,
{"taxpayer_id": taxpayer_id, "tax_year": tax_year, "tenant_id": tenant_id},
)
)
# Get expense items
expense_query = """
MATCH (t:TaxpayerProfile {taxpayer_id: $taxpayer_id, tenant_id: $tenant_id})-[:HAS_EXPENSE]->(e:ExpenseItem)
WHERE e.retracted_at IS NULL
AND e.tax_year = $tax_year
RETURN e
"""
expense_results = (
await neo4j_client.run_query( # pyright: ignore[reportOptionalMemberAccess]
expense_query,
{"taxpayer_id": taxpayer_id, "tax_year": tax_year, "tenant_id": tenant_id},
)
)
return {
"income_items": [result["i"] for result in income_results],
"expense_items": [result["e"] for result in expense_results],
"tax_year": tax_year,
"taxpayer_id": taxpayer_id,
}
async def _compute_sa103(
financial_data: dict[str, Any], tax_year: str
) -> tuple[dict[str, Any], list[dict[str, Any]]]:
"""Compute SA103 (Self-employment) schedule"""
income_items = financial_data.get("income_items", [])
expense_items = financial_data.get("expense_items", [])
# Calculate totals
total_turnover = Decimal("0")
total_expenses = Decimal("0")
evidence_trail = []
# Sum income
for income in income_items:
if income.get("type") == "self_employment":
amount = Decimal(str(income.get("gross", 0)))
total_turnover += amount
evidence_trail.append(
{
"box": "20",
"source_entity": income.get("income_id"),
"amount": float(amount),
"description": f"Income: {income.get('description', 'Unknown')}",
}
)
# Sum expenses
for expense in expense_items:
if expense.get("allowable", True):
amount = Decimal(str(expense.get("amount", 0)))
total_expenses += amount
evidence_trail.append(
{
"box": "31",
"source_entity": expense.get("expense_id"),
"amount": float(amount),
"description": f"Expense: {expense.get('description', 'Unknown')}",
}
)
# Calculate net profit
net_profit = total_turnover - total_expenses
# Create form boxes
form_boxes = {
"20": {
"value": float(total_turnover),
"description": "Total turnover",
"confidence": 0.9,
},
"31": {
"value": float(total_expenses),
"description": "Total allowable business expenses",
"confidence": 0.9,
},
"32": {
"value": float(net_profit),
"description": "Net profit",
"confidence": 0.9,
},
}
return form_boxes, evidence_trail
async def _compute_sa105(
financial_data: dict[str, Any], tax_year: str
) -> tuple[dict[str, Any], list[dict[str, Any]]]:
"""Compute SA105 (Property income) schedule"""
income_items = financial_data.get("income_items", [])
expense_items = financial_data.get("expense_items", [])
# Calculate property income and expenses
total_rents = Decimal("0")
total_property_expenses = Decimal("0")
evidence_trail = []
# Sum property income
for income in income_items:
if income.get("type") == "property":
amount = Decimal(str(income.get("gross", 0)))
total_rents += amount
evidence_trail.append(
{
"box": "20",
"source_entity": income.get("income_id"),
"amount": float(amount),
"description": f"Property income: {income.get('description', 'Unknown')}",
}
)
# Sum property expenses
for expense in expense_items:
if expense.get("type") == "property" and expense.get("allowable", True):
amount = Decimal(str(expense.get("amount", 0)))
total_property_expenses += amount
# Map to appropriate SA105 box based on expense category
box = _map_property_expense_to_box(expense.get("category", "other"))
evidence_trail.append(
{
"box": box,
"source_entity": expense.get("expense_id"),
"amount": float(amount),
"description": f"Property expense: {expense.get('description', 'Unknown')}",
}
)
# Calculate net property income
net_property_income = total_rents - total_property_expenses
form_boxes = {
"20": {
"value": float(total_rents),
"description": "Total rents and other income",
"confidence": 0.9,
},
"38": {
"value": float(total_property_expenses),
"description": "Total property expenses",
"confidence": 0.9,
},
"net_income": {
"value": float(net_property_income),
"description": "Net property income",
"confidence": 0.9,
},
}
return form_boxes, evidence_trail
async def _compute_sa100(
financial_data: dict[str, Any], tax_year: str
) -> tuple[dict[str, Any], list[dict[str, Any]]]:
"""Compute SA100 (Main return) schedule"""
# This would aggregate from other schedules
# For now, return basic structure
form_boxes = {
"1": {"value": "John Doe", "description": "Your name", "confidence": 0.9}
}
evidence_trail: list[dict[str, Any]] = []
return form_boxes, evidence_trail
def _map_property_expense_to_box(category: str) -> str:
"""Map property expense category to SA105 box"""
mapping = {
"rent_rates_insurance": "31",
"property_management": "32",
"services_wages": "33",
"repairs_maintenance": "34",
"finance_costs": "35",
"professional_fees": "36",
"costs_of_services": "37",
"other": "38",
}
return mapping.get(category, "38")
async def _store_calculation(
calculation_id: str,
schedule: str,
tax_year: str,
taxpayer_id: str,
form_boxes: dict[str, Any],
evidence_trail: list[dict[str, Any]],
tenant_id: str,
) -> None:
"""Store calculation results in knowledge graph"""
# Create calculation node
calc_properties = {
"calculation_id": calculation_id,
"schedule": schedule,
"tax_year": tax_year,
"taxpayer_id": taxpayer_id,
"tenant_id": tenant_id,
"calculated_at": datetime.utcnow().isoformat(),
"status": "completed",
"source": "reasoning_engine",
"extractor_version": "1.0.0",
"valid_from": datetime.utcnow(),
"asserted_at": datetime.utcnow(),
}
await neo4j_client.create_node("Calculation", calc_properties) # fmt: skip # pyright: ignore[reportOptionalMemberAccess]
# Create form box nodes
for box_id, box_data in form_boxes.items():
box_properties = {
"form": schedule,
"box": box_id,
"value": box_data["value"],
"description": box_data.get("description"),
"confidence": box_data.get("confidence"),
"calculation_id": calculation_id,
"tenant_id": tenant_id,
"source": "reasoning_engine",
"extractor_version": "1.0.0",
"valid_from": datetime.utcnow(),
"asserted_at": datetime.utcnow(),
}
await neo4j_client.create_node("FormBox", box_properties) # fmt: skip # pyright: ignore[reportOptionalMemberAccess]
# Create relationship
await neo4j_client.create_relationship( # pyright: ignore[reportOptionalMemberAccess]
"Calculation",
calculation_id,
"FormBox",
f"{calculation_id}_{box_id}",
"HAS_BOX",
)
@app.exception_handler(HTTPException)
async def http_exception_handler(request: Request, exc: HTTPException) -> JSONResponse:
"""Handle HTTP exceptions with RFC7807 format"""
return JSONResponse(
status_code=exc.status_code,
content=ErrorResponse(
type=f"https://httpstatuses.com/{exc.status_code}",
title=exc.detail,
status=exc.status_code,
detail=exc.detail,
instance=str(request.url),
trace_id=getattr(request.state, "trace_id", None),
).model_dump(),
)
if __name__ == "__main__":
import uvicorn
uvicorn.run("main:app", host="0.0.0.0", port=8008, reload=True, log_config=None)