completed local setup with compose

2025-11-26 13:17:17 +00:00
parent 8fe5e62fee
commit fdba81809f
87 changed files with 5610 additions and 3376 deletions
--- a/apps/svc_extract/Dockerfile
+++ b/apps/svc_extract/Dockerfile
@@ -13,9 +13,10 @@ ENV PATH="/opt/venv/bin:$PATH"

 # Copy requirements and install dependencies
 COPY libs/requirements-base.txt /tmp/libs-requirements.txt
+COPY libs/requirements-ml.txt /tmp/libs-ml-requirements.txt
 COPY apps/svc_extract/requirements.txt /tmp/requirements.txt
 RUN pip install --no-cache-dir --upgrade pip && \
-    pip install --no-cache-dir -r /tmp/libs-requirements.txt -r /tmp/requirements.txt
+    pip install --no-cache-dir -r /tmp/libs-requirements.txt -r /tmp/libs-ml-requirements.txt -r /tmp/requirements.txt

 # Production stage
 FROM python:3.12-slim
--- a/apps/svc_forms/Dockerfile
+++ b/apps/svc_forms/Dockerfile
@@ -43,7 +43,7 @@ RUN chown -R appuser:appuser /app
 USER appuser

 # Health check
-HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
+HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
    CMD curl -f http://localhost:8000/healthz || exit 1

 # Expose port
--- a/apps/svc_hmrc/Dockerfile
+++ b/apps/svc_hmrc/Dockerfile
@@ -44,7 +44,7 @@ RUN chown -R appuser:appuser /app
 USER appuser

 # Health check
-HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
+HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
    CMD curl -f http://localhost:8000/healthz || exit 1

 # Expose port
--- a/apps/svc_ingestion/main.py
+++ b/apps/svc_ingestion/main.py
@@ -158,13 +158,13 @@ async def upload_document(
            event_payload = EventPayload(
                data={
                    "doc_id": doc_id,
-                    "tenant_id": tenant_id,
+                    "filename": file.filename or "unknown",
                    "kind": kind.value,
                    "source": source,
-                    "checksum": checksum,
-                    "file_size": len(content),
-                    "content_type": content_type,
-                    "s3_url": storage_result["s3_url"],
+                    "checksum_sha256": checksum,
+                    "size_bytes": len(content),
+                    "mime_type": content_type,
+                    "storage_path": storage_result["s3_url"],
                },
                actor=current_user.get("sub", "system"),
                tenant_id=tenant_id,
--- a/apps/svc_kg/Dockerfile
+++ b/apps/svc_kg/Dockerfile
@@ -1,54 +1,27 @@
-# Multi-stage build for svc_kg
-FROM python:3.12-slim AS builder
+FROM python:3.12-slim-bookworm

-# Install build dependencies
-RUN apt-get update && apt-get install -y \
-    build-essential \
-    curl \
-    && rm -rf /var/lib/apt/lists/*
+# Set environment variables
+ENV PYTHONUNBUFFERED 1
+ENV APP_HOME /app

-# Create virtual environment
-RUN python -m venv /opt/venv
-ENV PATH="/opt/venv/bin:$PATH"
+# Create and set working directory
+WORKDIR $APP_HOME

-# Copy requirements and install dependencies
+# Install dependencies
 COPY libs/requirements-base.txt /tmp/libs-requirements.txt
-COPY libs/requirements-rdf.txt /tmp/libs-rdf.txt
 COPY apps/svc_kg/requirements.txt /tmp/requirements.txt
-RUN pip install --no-cache-dir --upgrade pip && \
-    pip install --no-cache-dir -r /tmp/libs-requirements.txt -r /tmp/libs-rdf.txt -r /tmp/requirements.txt
-
-# Production stage
-FROM python:3.12-slim
-
-# Install runtime dependencies
-RUN apt-get update && apt-get install -y \
-    curl \
-    && rm -rf /var/lib/apt/lists/* \
-    && groupadd -r appuser \
-    && useradd -r -g appuser appuser
-
-# Copy virtual environment from builder
-COPY --from=builder /opt/venv /opt/venv
-ENV PATH="/opt/venv/bin:$PATH"
-
-# Set working directory
-WORKDIR /app
+RUN pip install --no-cache-dir -r /tmp/libs-requirements.txt -r /tmp/requirements.txt

 # Copy application code
 COPY libs/ ./libs/
 COPY apps/svc_kg/ ./apps/svc_kg/

-# Create non-root user and set permissions
-RUN chown -R appuser:appuser /app
-USER appuser
-
-# Health check
-HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
-    CMD curl -f http://localhost:8000/healthz || exit 1
-
 # Expose port
+
 EXPOSE 8000

+
+
 # Run the application
+
 CMD ["python", "-m", "uvicorn", "apps.svc_kg.main:app", "--host", "0.0.0.0", "--port", "8000"]
--- a/apps/svc_kg/main.py
+++ b/apps/svc_kg/main.py
@@ -1,28 +1,22 @@
-# FILE: apps/svc-kg/main.py
-
-# Knowledge graph facade with CRUD, queries, lineage, and SHACL validation
-
-import json
 import os
-
-# Import shared libraries
 import sys
-from datetime import datetime
-from typing import Any
+from typing import Any, cast

 import structlog
-from fastapi import Depends, HTTPException, Query, Request
+from fastapi import HTTPException, Request
 from fastapi.responses import JSONResponse
+from pyshacl import validate
+from rdflib import Graph, Literal, URIRef
+from rdflib.namespace import RDF

 sys.path.append(os.path.join(os.path.dirname(__file__), "..", ".."))

 from libs.app_factory import create_app
 from libs.config import BaseAppSettings, create_event_bus, create_neo4j_client
-from libs.events import EventBus
-from libs.neo import Neo4jClient, SHACLValidator, TemporalQueries
+from libs.events import EventBus, EventPayload, EventTopics
+from libs.neo import Neo4jClient
 from libs.observability import get_metrics, get_tracer, setup_observability
 from libs.schemas import ErrorResponse
-from libs.security import get_current_user, get_tenant_id

 logger = structlog.get_logger()

@@ -31,523 +25,193 @@ class KGSettings(BaseAppSettings):
    """Settings for KG service"""

    service_name: str = "svc-kg"
+    shacl_shapes_path: str = "schemas/shapes.ttl"

-    # SHACL validation
-    shapes_file: str = "schemas/shapes.ttl"
-    validate_on_write: bool = True
-
-    # Query limits
-    max_results: int = 1000
-    max_depth: int = 10
-    query_timeout: int = 30
-
-
-# Create app and settings
-app, settings = create_app(
-    service_name="svc-kg",
-    title="Tax Agent Knowledge Graph Service",
-    description="Knowledge graph facade with CRUD and queries",
-    settings_class=KGSettings,
-)

 # Global clients
 neo4j_client: Neo4jClient | None = None
-shacl_validator: SHACLValidator | None = None
 event_bus: EventBus | None = None
-tracer = get_tracer("svc-kg")
-metrics = get_metrics()
+shapes_graph: Graph | None = None
+
+settings: KGSettings


-@app.on_event("startup")
-async def startup_event() -> None:
+async def init_dependencies(app_settings: KGSettings) -> None:
    """Initialize service dependencies"""
-    global neo4j_client, shacl_validator, event_bus
+    global neo4j_client, event_bus, settings, shapes_graph

+    settings = app_settings
    logger.info("Starting KG service")

-    # Setup observability
    setup_observability(settings)

-    # Initialize Neo4j client
    neo4j_driver = create_neo4j_client(settings)
    neo4j_client = Neo4jClient(neo4j_driver)

-    # Initialize SHACL validator
-    if os.path.exists(settings.shapes_file):
-        shacl_validator = SHACLValidator(settings.shapes_file)
-
-    # Initialize event bus
    event_bus = create_event_bus(settings)
+    if not event_bus:
+        raise HTTPException(status_code=500, detail="Event bus not initialized")
    await event_bus.start()

-    logger.info("KG service started successfully")
+    await event_bus.subscribe(EventTopics.KG_UPSERT_READY, _handle_kg_upsert_ready)
+
+    # Load SHACL shapes
+    try:
+        shapes_graph = Graph().parse(settings.shacl_shapes_path, format="turtle")
+        logger.info("SHACL shapes loaded successfully")
+    except Exception as e:
+        logger.error("Failed to load SHACL shapes", error=str(e))
+        shapes_graph = None
+
+
+app, _settings = create_app(
+    service_name="svc-kg",
+    title="Tax Agent Knowledge Graph Service",
+    description="Service for managing and validating the Knowledge Graph",
+    settings_class=KGSettings,
+)
+
+
+# Initialize dependencies immediately
+@app.on_event("startup")
+async def startup_event():
+    await init_dependencies(cast(KGSettings, _settings))
+
+
+tracer = get_tracer("svc-kg")
+metrics = get_metrics()


@app.on_event("shutdown")
 async def shutdown_event() -> None:
    """Cleanup service dependencies"""
-    global neo4j_client, event_bus
+    global event_bus, neo4j_client

    logger.info("Shutting down KG service")
-
-    if neo4j_client:
-        await neo4j_client.close()
-
    if event_bus:
        await event_bus.stop()
-
+    if neo4j_client:
+        await neo4j_client.close()
    logger.info("KG service shutdown complete")


-@app.get("/health")
-async def health_check() -> dict[str, Any]:
-    """Health check endpoint"""
-    return {
-        "status": "healthy",
-        "service": settings.service_name,
-        "version": settings.service_version,
-        "timestamp": datetime.utcnow().isoformat(),
-    }
+async def _handle_kg_upsert_ready(topic: str, payload: EventPayload) -> None:
+    """Handle KG upsert ready events"""
+    data = payload.data
+    nodes = data.get("nodes", [])
+    relationships = data.get("relationships", [])
+    document_id = data.get("document_id")
+    tenant_id = data.get("tenant_id")

+    if not nodes and not relationships:
+        logger.warning("No nodes or relationships to upsert", data=data)
+        return

-@app.post("/nodes/{label}")
-async def create_node(
-    label: str,
-    properties: dict[str, Any],
-    current_user: dict[str, Any] = Depends(get_current_user),
-    tenant_id: str = Depends(get_tenant_id),
-) -> dict[str, Any]:
-    """Create a new node"""
-
-    with tracer.start_as_current_span("create_node") as span:
-        span.set_attribute("label", label)
+    with tracer.start_as_current_span("upsert_kg_data") as span:
+        span.set_attribute("document_id", document_id)
        span.set_attribute("tenant_id", tenant_id)
+        span.set_attribute("node_count", len(nodes))
+        span.set_attribute("relationship_count", len(relationships))

        try:
-            # Add tenant isolation
-            properties["tenant_id"] = tenant_id
-            properties["created_by"] = current_user.get("sub", "system")
-
-            # Validate with SHACL if enabled
-            if settings.validate_on_write and shacl_validator:
-                await _validate_node(label, properties)
-
-            # Create node
-            result = await neo4j_client.create_node(label, properties)
-
-            # Update metrics
-            metrics.counter("nodes_created_total").labels(
-                tenant_id=tenant_id, label=label
-            ).inc()
-
-            logger.info("Node created", label=label, node_id=result.get("id"))
-
-            return {
-                "status": "created",
-                "label": label,
-                "properties": properties,
-                "neo4j_result": result,
-            }
-
-        except Exception as e:
-            logger.error("Failed to create node", label=label, error=str(e))
-            raise HTTPException(
-                status_code=500, detail=f"Failed to create node: {str(e)}"
+            # 1. Validate data against SHACL schema
+            conforms, validation_report = await _validate_with_shacl(
+                nodes, relationships
            )
+            if not conforms:
+                logger.error(
+                    "SHACL validation failed",
+                    document_id=document_id,
+                    validation_report=validation_report,
+                )
+                metrics.counter("kg_validation_errors_total").labels(
+                    tenant_id=tenant_id
+                ).inc()
+                return

+            # 2. Write data to Neo4j
+            for node in nodes:
+                await neo4j_client.create_node(node["type"], node["properties"])  # type: ignore

-@app.get("/nodes/{label}")
-async def get_nodes(
-    label: str,
-    limit: int = Query(default=100, le=settings.max_results),
-    filters: str | None = Query(default=None),
-    current_user: dict[str, Any] = Depends(get_current_user),
-    tenant_id: str = Depends(get_tenant_id),
-) -> dict[str, Any]:
-    """Get nodes by label with optional filters"""
+            for rel in relationships:
+                await neo4j_client.create_relationship(  # type: ignore
+                    rel["sourceId"],
+                    rel["targetId"],
+                    rel["type"],
+                    rel["properties"],
+                )

-    with tracer.start_as_current_span("get_nodes") as span:
-        span.set_attribute("label", label)
-        span.set_attribute("tenant_id", tenant_id)
-        span.set_attribute("limit", limit)
-
-        try:
-            # Parse filters
-            filter_dict: dict[str, Any] = {}
-            if filters:
-                try:
-                    filter_dict = json.loads(filters)
-                except json.JSONDecodeError:
-                    raise HTTPException(status_code=400, detail="Invalid filters JSON")
-
-            # Add tenant isolation
-            filter_dict["tenant_id"] = tenant_id
-
-            # Build query
-            query = TemporalQueries.get_current_state_query(label, filter_dict)
-            query += f" LIMIT {limit}"
-
-            # Execute query
-            results = await neo4j_client.run_query(query)
-
-            # Update metrics
-            metrics.counter("nodes_queried_total").labels(
-                tenant_id=tenant_id, label=label
-            ).inc()
-
-            return {
-                "label": label,
-                "count": len(results),
-                "nodes": [result["n"] for result in results],
-            }
-
-        except HTTPException:
-            raise
-        except Exception as e:
-            logger.error("Failed to get nodes", label=label, error=str(e))
-            raise HTTPException(
-                status_code=500, detail=f"Failed to get nodes: {str(e)}"
+            # 3. Publish kg.upserted event
+            event_payload = EventPayload(
+                data={
+                    "document_id": document_id,
+                    "tenant_id": tenant_id,
+                    "taxpayer_id": data.get("taxpayer_id"),
+                    "tax_year": data.get("tax_year"),
+                    "node_count": len(nodes),
+                    "relationship_count": len(relationships),
+                },
+                actor=payload.actor,
+                tenant_id=tenant_id,
+                trace_id=str(span.get_span_context().trace_id),
            )
+            await event_bus.publish(EventTopics.KG_UPSERTED, event_payload)  # type: ignore

-
-@app.get("/nodes/{label}/{node_id}")
-async def get_node(
-    label: str,
-    node_id: str,
-    include_lineage: bool = Query(default=False),
-    current_user: dict[str, Any] = Depends(get_current_user),
-    tenant_id: str = Depends(get_tenant_id),
-) -> dict[str, Any]:
-    """Get specific node with optional lineage"""
-
-    with tracer.start_as_current_span("get_node") as span:
-        span.set_attribute("label", label)
-        span.set_attribute("node_id", node_id)
-        span.set_attribute("tenant_id", tenant_id)
-
-        try:
-            # Get node
-            query = f"""
-            MATCH (n:{label} {{id: $node_id, tenant_id: $tenant_id}})
-            WHERE n.retracted_at IS NULL
-            RETURN n
-            """
-
-            results = await neo4j_client.run_query(
-                query, {"node_id": node_id, "tenant_id": tenant_id}
-            )
-
-            if not results:
-                raise HTTPException(status_code=404, detail="Node not found")
-
-            node_data = results[0]["n"]
-
-            # Get lineage if requested
-            lineage: list[dict[str, Any]] = []
-            if include_lineage:
-                lineage = await neo4j_client.get_node_lineage(node_id)
-
-            return {"node": node_data, "lineage": lineage if include_lineage else None}
-
-        except HTTPException:
-            raise
-        except Exception as e:
-            logger.error(
-                "Failed to get node", label=label, node_id=node_id, error=str(e)
-            )
-            raise HTTPException(status_code=500, detail=f"Failed to get node: {str(e)}")
-
-
-@app.put("/nodes/{label}/{node_id}")
-async def update_node(
-    label: str,
-    node_id: str,
-    properties: dict[str, Any],
-    current_user: dict[str, Any] = Depends(get_current_user),
-    tenant_id: str = Depends(get_tenant_id),
-) -> dict[str, Any]:
-    """Update node with bitemporal versioning"""
-
-    with tracer.start_as_current_span("update_node") as span:
-        span.set_attribute("label", label)
-        span.set_attribute("node_id", node_id)
-        span.set_attribute("tenant_id", tenant_id)
-
-        try:
-            # Add metadata
-            properties["tenant_id"] = tenant_id
-            properties["updated_by"] = current_user.get("sub", "system")
-
-            # Validate with SHACL if enabled
-            if settings.validate_on_write and shacl_validator:
-                await _validate_node(label, properties)
-
-            # Update node (creates new version)
-            await neo4j_client.update_node(label, node_id, properties)
-
-            # Update metrics
-            metrics.counter("nodes_updated_total").labels(
-                tenant_id=tenant_id, label=label
-            ).inc()
-
-            logger.info("Node updated", label=label, node_id=node_id)
-
-            return {
-                "status": "updated",
-                "label": label,
-                "node_id": node_id,
-                "properties": properties,
-            }
-
-        except Exception as e:
-            logger.error(
-                "Failed to update node", label=label, node_id=node_id, error=str(e)
-            )
-            raise HTTPException(
-                status_code=500, detail=f"Failed to update node: {str(e)}"
-            )
-
-
-@app.post("/relationships")
-async def create_relationship(
-    from_label: str,
-    from_id: str,
-    to_label: str,
-    to_id: str,
-    relationship_type: str,
-    properties: dict[str, Any] | None = None,
-    current_user: dict[str, Any] = Depends(get_current_user),
-    tenant_id: str = Depends(get_tenant_id),
-) -> dict[str, Any]:
-    """Create relationship between nodes"""
-
-    with tracer.start_as_current_span("create_relationship") as span:
-        span.set_attribute("from_label", from_label)
-        span.set_attribute("to_label", to_label)
-        span.set_attribute("relationship_type", relationship_type)
-        span.set_attribute("tenant_id", tenant_id)
-
-        try:
-            # Add metadata
-            rel_properties = properties or {}
-            rel_properties["tenant_id"] = tenant_id
-            rel_properties["created_by"] = current_user.get("sub", "system")
-
-            # Create relationship
-            await neo4j_client.create_relationship(
-                from_label, from_id, to_label, to_id, relationship_type, rel_properties
-            )
-
-            # Update metrics
-            metrics.counter("relationships_created_total").labels(
-                tenant_id=tenant_id, relationship_type=relationship_type
-            ).inc()
-
+            metrics.counter("kg_upserts_total").labels(tenant_id=tenant_id).inc()
            logger.info(
-                "Relationship created",
-                from_id=from_id,
-                to_id=to_id,
-                type=relationship_type,
+                "KG upsert completed", document_id=document_id, tenant_id=tenant_id
            )

-            return {
-                "status": "created",
-                "from_id": from_id,
-                "to_id": to_id,
-                "relationship_type": relationship_type,
-                "properties": rel_properties,
-            }
-
        except Exception as e:
-            logger.error("Failed to create relationship", error=str(e))
-            raise HTTPException(
-                status_code=500, detail=f"Failed to create relationship: {str(e)}"
+            logger.error(
+                "Failed to upsert KG data", document_id=document_id, error=str(e)
            )
-
-
-@app.post("/query")
-async def execute_query(
-    query: str,
-    parameters: dict[str, Any] | None = None,
-    current_user: dict[str, Any] = Depends(get_current_user),
-    tenant_id: str = Depends(get_tenant_id),
-) -> dict[str, Any]:
-    """Execute custom Cypher query with tenant isolation"""
-
-    with tracer.start_as_current_span("execute_query") as span:
-        span.set_attribute("tenant_id", tenant_id)
-
-        try:
-            # Add tenant isolation to parameters
-            query_params = parameters or {}
-            query_params["tenant_id"] = tenant_id
-
-            # Validate query (basic security check)
-            if not _is_safe_query(query):
-                raise HTTPException(status_code=400, detail="Unsafe query detected")
-
-            # Execute query with timeout
-            results = await neo4j_client.run_query(query, query_params, max_retries=1)
-
-            # Update metrics
-            metrics.counter("custom_queries_total").labels(tenant_id=tenant_id).inc()
-
-            return {
-                "query": query,
-                "parameters": query_params,
-                "results": results,
-                "count": len(results),
-            }
-
-        except Exception as e:
-            logger.error("Query execution failed", query=query[:100], error=str(e))
-            raise HTTPException(status_code=500, detail=f"Query failed: {str(e)}")
-
-
-@app.get("/export/rdf")
-async def export_rdf(
-    format: str = Query(default="turtle"),
-    current_user: dict[str, Any] = Depends(get_current_user),
-    tenant_id: str = Depends(get_tenant_id),
-) -> dict[str, Any]:
-    """Export knowledge graph as RDF"""
-
-    with tracer.start_as_current_span("export_rdf") as span:
-        span.set_attribute("format", format)
-        span.set_attribute("tenant_id", tenant_id)
-
-        try:
-            # Export tenant-specific data
-            rdf_data = await neo4j_client.export_to_rdf(format)
-
-            # Update metrics
-            metrics.counter("rdf_exports_total").labels(
-                tenant_id=tenant_id, format=format
+            metrics.counter("kg_upsert_errors_total").labels(
+                tenant_id=tenant_id, error_type=type(e).__name__
            ).inc()

-            return {
-                "format": format,
-                "rdf_data": rdf_data,
-                "exported_at": datetime.utcnow().isoformat(),
-            }

-        except Exception as e:
-            logger.error("RDF export failed", format=format, error=str(e))
-            raise HTTPException(
-                status_code=500, detail=f"RDF export failed: {str(e)}"
-            ) from e
+async def _validate_with_shacl(
+    nodes: list[dict[str, Any]], relationships: list[dict[str, Any]]
+) -> tuple[bool, str]:
+    """Validate data against SHACL shapes."""
+    if not shapes_graph:
+        logger.warning("SHACL shapes not loaded, skipping validation.")
+        return True, "SHACL shapes not loaded"

+    data_graph = Graph()
+    namespace = "http://ai-tax-agent.com/ontology/"

-@app.post("/validate")
-async def validate_graph(
-    current_user: dict[str, Any] = Depends(get_current_user),
-    tenant_id: str = Depends(get_tenant_id),
-) -> dict[str, Any]:
-    """Validate knowledge graph with SHACL"""
+    for node in nodes:
+        node_uri = URIRef(f"{namespace}{node['id']}")
+        data_graph.add((node_uri, RDF.type, URIRef(f"{namespace}{node['type']}")))
+        for key, value in node["properties"].items():
+            if value is not None:
+                data_graph.add((node_uri, URIRef(f"{namespace}{key}"), Literal(value)))

-    with tracer.start_as_current_span("validate_graph") as span:
-        span.set_attribute("tenant_id", tenant_id)
-
-        try:
-            if not shacl_validator:
-                raise HTTPException(
-                    status_code=501, detail="SHACL validation not configured"
-                )
-
-            # Export current graph state
-            rdf_export = await neo4j_client.export_to_rdf("turtle")
-
-            # Extract RDF data from export result
-            rdf_data = rdf_export.get("rdf_data", "")
-            if not rdf_data:
-                raise HTTPException(
-                    status_code=500, detail="Failed to export RDF data for validation"
-                )
-
-            # Run SHACL validation
-            validation_result = await shacl_validator.validate_graph(rdf_data)
-
-            # Update metrics
-            metrics.counter("validations_total").labels(
-                tenant_id=tenant_id, conforms=validation_result["conforms"]
-            ).inc()
-
-            return {
-                "conforms": validation_result["conforms"],
-                "violations_count": validation_result["violations_count"],
-                "results_text": validation_result["results_text"],
-                "validated_at": datetime.utcnow().isoformat(),
-            }
-
-        except Exception as e:
-            logger.error("Graph validation failed", error=str(e))
-            raise HTTPException(status_code=500, detail=f"Validation failed: {str(e)}")
-
-
-async def _validate_node(label: str, properties: dict[str, Any]) -> bool:
-    """Validate node with SHACL"""
-    if not shacl_validator:
-        return True
+    for rel in relationships:
+        source_uri = URIRef(f"{namespace}{rel['sourceId']}")
+        target_uri = URIRef(f"{namespace}{rel['targetId']}")
+        rel_uri = URIRef(f"{namespace}{rel['type']}")
+        data_graph.add((source_uri, rel_uri, target_uri))

    try:
-        # Create a minimal RDF representation of the node for validation
-        rdf_lines = ["@prefix tax: <https://tax-kg.example.com/> ."]
-        node_uri = "tax:temp_node"
-
-        # Add type declaration
-        rdf_lines.append(f"{node_uri} a tax:{label} .")
-
-        # Add properties
-        for prop, value in properties.items():
-            if isinstance(value, str):
-                rdf_lines.append(f'{node_uri} tax:{prop} "{value}" .')
-            else:
-                rdf_lines.append(f"{node_uri} tax:{prop} {value} .")
-
-        rdf_data = "\n".join(rdf_lines)
-
-        # Validate the node RDF data
-        validation_result = await shacl_validator.validate_graph(rdf_data)
-
-        if not validation_result["conforms"]:
-            logger.warning(
-                "Node SHACL validation failed",
-                label=label,
-                violations=validation_result["violations_count"],
-                details=validation_result["results_text"],
-            )
-            return False
-
-        logger.debug("Node SHACL validation passed", label=label)
-        return True
-
+        conforms, results_graph, results_text = validate(
+            data_graph,
+            shacl_graph=shapes_graph,
+            ont_graph=None,  # No ontology graph
+            inference="rdfs",
+            abort_on_first=False,
+            allow_infos=False,
+            meta_shacl=False,
+            advanced=False,
+            js=False,
+            debug=False,
+        )
+        return conforms, results_text
    except Exception as e:
-        logger.error("Node SHACL validation error", label=label, error=str(e))
-        # Return True to not block operations on validation errors
-        return True
-
-
-def _is_safe_query(query: str) -> bool:
-    """Basic query safety check"""
-    query_lower = query.lower()
-
-    # Block dangerous operations
-    dangerous_keywords = [
-        "delete",
-        "remove",
-        "drop",
-        "create index",
-        "create constraint",
-        "load csv",
-        "call",
-        "foreach",
-    ]
-
-    for keyword in dangerous_keywords:
-        if keyword in query_lower:
-            return False
-
-    return True
+        logger.error("Error during SHACL validation", error=str(e))
+        return False, str(e)


@app.exception_handler(HTTPException)
@@ -561,7 +225,7 @@ async def http_exception_handler(request: Request, exc: HTTPException) -> JSONRe
            status=exc.status_code,
            detail=exc.detail,
            instance=str(request.url),
-            trace_id="",
+            trace_id=getattr(request.state, "trace_id", None),
        ).model_dump(),
    )

--- a/apps/svc_kg/requirements.txt
+++ b/apps/svc_kg/requirements.txt
@@ -1,22 +1,2 @@
-# Service-specific dependencies
-# RDF and semantic web
-rdflib>=7.2.1
-pyshacl>=0.30.1
-
-# Graph algorithms
-networkx>=3.5
-
-# Data export formats
-xmltodict>=1.0.2
-
-# Query optimization
-pyparsing>=3.2.5
-
-# Graph visualization (optional)
-graphviz>=0.21
-
-# Additional Neo4j utilities
-neomodel>=5.5.3
-
-# Cypher query building
-py2neo>=2021.2.4
+setuptools
+pyshacl==0.23.0
--- a/apps/svc_normalize_map/Dockerfile
+++ b/apps/svc_normalize_map/Dockerfile
@@ -1,53 +1,27 @@
-# Multi-stage build for svc_normalize_map
-FROM python:3.12-slim AS builder
+FROM python:3.12-slim-bookworm

-# Install build dependencies
-RUN apt-get update && apt-get install -y \
-    build-essential \
-    curl \
-    && rm -rf /var/lib/apt/lists/*
+# Set environment variables
+ENV PYTHONUNBUFFERED 1
+ENV APP_HOME /app

-# Create virtual environment
-RUN python -m venv /opt/venv
-ENV PATH="/opt/venv/bin:$PATH"
+# Create and set working directory
+WORKDIR $APP_HOME

-# Copy requirements and install dependencies
+# Install dependencies
 COPY libs/requirements-base.txt /tmp/libs-requirements.txt
 COPY apps/svc_normalize_map/requirements.txt /tmp/requirements.txt
-RUN pip install --no-cache-dir --upgrade pip && \
-    pip install --no-cache-dir -r /tmp/libs-requirements.txt -r /tmp/requirements.txt
-
-# Production stage
-FROM python:3.12-slim
-
-# Install runtime dependencies
-RUN apt-get update && apt-get install -y \
-    curl \
-    && rm -rf /var/lib/apt/lists/* \
-    && groupadd -r appuser \
-    && useradd -r -g appuser appuser
-
-# Copy virtual environment from builder
-COPY --from=builder /opt/venv /opt/venv
-ENV PATH="/opt/venv/bin:$PATH"
-
-# Set working directory
-WORKDIR /app
+RUN pip install --no-cache-dir -r /tmp/libs-requirements.txt -r /tmp/requirements.txt

 # Copy application code
 COPY libs/ ./libs/
 COPY apps/svc_normalize_map/ ./apps/svc_normalize_map/

-# Create non-root user and set permissions
-RUN chown -R appuser:appuser /app
-USER appuser
-
-# Health check
-HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
-    CMD curl -f http://localhost:8000/healthz || exit 1
-
 # Expose port
+
 EXPOSE 8000

+
+
 # Run the application
+
 CMD ["python", "-m", "uvicorn", "apps.svc_normalize_map.main:app", "--host", "0.0.0.0", "--port", "8000"]
--- a/apps/svc_normalize_map/main.py
+++ b/apps/svc_normalize_map/main.py
@@ -1,24 +1,11 @@
-"""Data normalization and knowledge graph mapping."""
-
-# FILE: apps/svc-normalize-map/main.py
-# pylint: disable=wrong-import-position,import-error,too-few-public-methods,global-statement
-# pylint: disable=global-variable-not-assigned,raise-missing-from,unused-argument
-# pylint: disable=broad-exception-caught,no-else-return,too-many-arguments,too-many-positional-arguments
-# pylint: disable=too-many-locals,import-outside-toplevel,too-many-statements
-# mypy: disable-error-code=union-attr
-
-
 import os
-
-# Import shared libraries
 import sys
-from datetime import datetime
-from decimal import Decimal
-from typing import Any
+from datetime import UTC, datetime
+from typing import Any, cast

 import structlog
 import ulid
-from fastapi import BackgroundTasks, Depends, HTTPException, Request
+from fastapi import HTTPException, Request
 from fastapi.responses import JSONResponse

 sys.path.append(os.path.join(os.path.dirname(__file__), "..", ".."))
@@ -34,83 +21,68 @@ from libs.events import EventBus, EventPayload, EventTopics
 from libs.neo import Neo4jClient
 from libs.observability import get_metrics, get_tracer, setup_observability
 from libs.schemas import ErrorResponse
-from libs.security import get_current_user, get_tenant_id
 from libs.storage import DocumentStorage, StorageClient

 logger = structlog.get_logger()


 class NormalizeMapSettings(BaseAppSettings):
-    """Settings for normalize-map service"""
+    """Settings for NormalizeMap service"""

    service_name: str = "svc-normalize-map"

-    # Normalization configuration
-    currency_default: str = "GBP"
-    date_formats: list[str] = [
-        "%Y-%m-%d",
-        "%d/%m/%Y",
-        "%d-%m-%Y",
-        "%d %B %Y",
-        "%d %b %Y",
-        "%B %d, %Y",
-    ]
-
-    # Mapping configuration
-    confidence_threshold: float = 0.7
-    auto_create_entities: bool = True
-
-    # Validation rules
-    max_amount: float = 1000000.0  # £1M
-    min_confidence: float = 0.5
-
-
-# Create app and settings
-app, settings = create_app(
-    service_name="svc-normalize-map",
-    title="Tax Agent Normalize-Map Service",
-    description="Data normalization and knowledge graph mapping service",
-    settings_class=NormalizeMapSettings,
-)

 # Global clients
 storage_client: StorageClient | None = None
 document_storage: DocumentStorage | None = None
-neo4j_client: Neo4jClient | None = None
 event_bus: EventBus | None = None
-tracer = get_tracer("svc-normalize-map")
-metrics = get_metrics()
+neo4j_client: Neo4jClient | None = None
+
+settings: NormalizeMapSettings


-@app.on_event("startup")
-async def startup_event() -> None:
+async def init_dependencies(app_settings: NormalizeMapSettings) -> None:
    """Initialize service dependencies"""
-    global storage_client, document_storage, neo4j_client, event_bus
+    global storage_client, document_storage, event_bus, neo4j_client, settings

-    logger.info("Starting normalize-map service")
+    settings = app_settings
+    logger.info("Starting NormalizeMap service")

-    # Setup observability
    setup_observability(settings)

-    # Initialize MinIO client
    minio_client = create_minio_client(settings)
    storage_client = StorageClient(minio_client)
    document_storage = DocumentStorage(storage_client)

-    # Initialize Neo4j client
    neo4j_driver = create_neo4j_client(settings)
    neo4j_client = Neo4jClient(neo4j_driver)

-    # Initialize event bus
    event_bus = create_event_bus(settings)
+    if not event_bus:
+        raise HTTPException(status_code=500, detail="Event bus not initialized")
    await event_bus.start()

-    # Subscribe to extraction completion events
-    await event_bus.subscribe(  # type: ignore
-        EventTopics.DOC_EXTRACTED, _handle_extraction_completed
-    )
+    await event_bus.subscribe(EventTopics.DOC_EXTRACTED, _handle_document_extracted)

-    logger.info("Normalize-map service started successfully")
+    logger.info("NormalizeMap service started successfully")
+
+
+app, _settings = create_app(
+    service_name="svc-normalize-map",
+    title="Tax Agent Normalize and Map Service",
+    description="Normalize extracted data and map to Knowledge Graph",
+    settings_class=NormalizeMapSettings,
+)
+
+
+# Initialize dependencies immediately
+@app.on_event("startup")
+async def startup_event():  # type: ignore
+    await init_dependencies(cast(NormalizeMapSettings, _settings))
+
+
+tracer = get_tracer("svc-normalize-map")
+metrics = get_metrics()


@app.on_event("shutdown")
@@ -118,456 +90,235 @@ async def shutdown_event() -> None:
    """Cleanup service dependencies"""
    global event_bus, neo4j_client

-    logger.info("Shutting down normalize-map service")
-
-    if neo4j_client:
-        await neo4j_client.close()
-
+    logger.info("Shutting down NormalizeMap service")
    if event_bus:
        await event_bus.stop()
-
-    logger.info("Normalize-map service shutdown complete")
+    if neo4j_client:
+        await neo4j_client.close()
+    logger.info("NormalizeMap service shutdown complete")


-@app.get("/health")
-async def health_check() -> dict[str, Any]:
-    """Health check endpoint"""
-    return {
-        "status": "healthy",
-        "service": settings.service_name,
-        "version": settings.service_version,
-        "timestamp": datetime.utcnow().isoformat(),
-    }
+async def _handle_document_extracted(topic: str, payload: EventPayload) -> None:
+    """Handle document extracted events"""
+    data = payload.data
+    doc_id = data.get("doc_id")
+    tenant_id = data.get("tenant_id")
+    extracted_fields = data.get("extraction_results", {}).get("extracted_fields", {})
+    provenance = data.get("extraction_results", {}).get("provenance", [])

+    if not doc_id or not tenant_id or not extracted_fields:
+        logger.warning("Invalid document extracted event", data=data)
+        return

-@app.post("/normalize/{doc_id}")
-async def normalize_document(
-    doc_id: str,
-    background_tasks: BackgroundTasks,
-    current_user: dict[str, Any] = Depends(get_current_user),
-    tenant_id: str = Depends(get_tenant_id),
-) -> dict[str, Any]:
-    """Normalize and map document data to knowledge graph"""
-
-    with tracer.start_as_current_span("normalize_document") as span:
+    with tracer.start_as_current_span("normalize_and_map") as span:
        span.set_attribute("doc_id", doc_id)
        span.set_attribute("tenant_id", tenant_id)

        try:
-            # Check if extraction results exist
-            extraction_results = await document_storage.get_extraction_result(
-                tenant_id, doc_id
-            )
-            if not extraction_results:
-                raise HTTPException(
-                    status_code=404, detail="Extraction results not found"
-                )
+            # 1. Normalize data
+            normalized_data = await _normalize_data(extracted_fields)

-            # Generate normalization ID
-            normalization_id = str(ulid.new())
-            span.set_attribute("normalization_id", normalization_id)
-
-            # Start background normalization
-            background_tasks.add_task(
-                _normalize_and_map_async,
-                doc_id,
-                tenant_id,
-                extraction_results,
-                normalization_id,
-                current_user.get("sub", "system"),
+            # 2. Map to KG ontology
+            kg_upsert_payload = await _map_to_kg_ontology(
+                doc_id, tenant_id, normalized_data, provenance
            )

-            logger.info(
-                "Normalization started",
-                doc_id=doc_id,
-                normalization_id=normalization_id,
+            # 3. Publish kg.upsert.ready event
+            event_payload = EventPayload(
+                data=kg_upsert_payload,
+                actor=payload.actor,
+                tenant_id=tenant_id,
+                trace_id=str(span.get_span_context().trace_id),
            )
+            await event_bus.publish(EventTopics.KG_UPSERT_READY, event_payload)  # type: ignore

-            return {
-                "normalization_id": normalization_id,
-                "doc_id": doc_id,
-                "status": "processing",
-            }
-
-        except HTTPException:
-            raise
-        except Exception as e:
-            logger.error("Failed to start normalization", doc_id=doc_id, error=str(e))
-            raise HTTPException(status_code=500, detail="Failed to start normalization")
-
-
-async def _handle_extraction_completed(topic: str, payload: EventPayload) -> None:
-    """Handle extraction completion events"""
-    try:
-        data = payload.data
-        doc_id = data.get("doc_id")
-        tenant_id = data.get("tenant_id")
-        confidence = data.get("confidence", 0.0)
-
-        if not doc_id or not tenant_id:
-            logger.warning("Invalid extraction completion event", data=data)
-            return
-
-        # Only auto-process if confidence is above threshold
-        if confidence >= settings.confidence_threshold:
-            logger.info(
-                "Auto-normalizing extracted document",
-                doc_id=doc_id,
-                confidence=confidence,
-            )
-
-            extraction_results = data.get("extraction_results")
-            if not extraction_results:
-                extraction_results = await document_storage.get_extraction_result(
-                    tenant_id, doc_id
-                )
-
-            if extraction_results:
-                await _normalize_and_map_async(
-                    doc_id=doc_id,
-                    tenant_id=tenant_id,
-                    extraction_results=extraction_results,
-                    normalization_id=str(ulid.new()),
-                    actor=payload.actor,
-                )
-        else:
-            logger.info(
-                "Skipping auto-normalization due to low confidence",
-                doc_id=doc_id,
-                confidence=confidence,
-            )
-
-    except Exception as e:
-        logger.error("Failed to handle extraction completion", error=str(e))
-
-
-async def _normalize_and_map_async(
-    doc_id: str,
-    tenant_id: str,
-    extraction_results: dict[str, Any],
-    normalization_id: str,
-    actor: str,
-) -> None:
-    """Normalize and map data asynchronously"""
-
-    with tracer.start_as_current_span("normalize_and_map_async") as span:
-        span.set_attribute("doc_id", doc_id)
-        span.set_attribute("normalization_id", normalization_id)
-
-        try:
-            extracted_fields = extraction_results.get("extracted_fields", {})
-            provenance = extraction_results.get("provenance", [])
-
-            # Normalize extracted data
-            normalized_data = await _normalize_data(extracted_fields, provenance)
-
-            # Map to knowledge graph entities
-            entities = await _map_to_entities(normalized_data, doc_id, tenant_id)
-
-            # Store entities in knowledge graph
-            stored_entities = await _store_entities(entities, tenant_id)
-
-            # Create normalization results
-            normalization_results = {
-                "doc_id": doc_id,
-                "normalization_id": normalization_id,
-                "normalized_at": datetime.utcnow().isoformat(),
-                "normalized_data": normalized_data,
-                "entities": stored_entities,
-                "entity_count": len(stored_entities),
-            }
-
-            logger.info("Normalization completed", results=normalization_results)
-
-            # Update metrics
-            metrics.counter("documents_normalized_total").labels(
+            metrics.counter("normalized_documents_total").labels(
                tenant_id=tenant_id
            ).inc()
-
-            metrics.histogram("entities_created").labels(tenant_id=tenant_id).observe(
-                len(stored_entities)
-            )
-
-            # Publish completion event
-            event_payload = EventPayload(
-                data={
-                    "doc_id": doc_id,
-                    "tenant_id": tenant_id,
-                    "normalization_id": normalization_id,
-                    "entity_count": len(stored_entities),
-                    "entities": stored_entities,
-                },
-                actor=actor,
-                tenant_id=tenant_id,
-            )
-
-            await event_bus.publish(EventTopics.KG_UPSERTED, event_payload)
-
            logger.info(
-                "Normalization completed", doc_id=doc_id, entities=len(stored_entities)
+                "Document normalized and mapped", doc_id=doc_id, tenant_id=tenant_id
            )

        except Exception as e:
-            logger.error("Normalization failed", doc_id=doc_id, error=str(e))
-
-            # Update error metrics
+            logger.error(
+                "Failed to normalize and map document", doc_id=doc_id, error=str(e)
+            )
            metrics.counter("normalization_errors_total").labels(
                tenant_id=tenant_id, error_type=type(e).__name__
            ).inc()


-async def _normalize_data(
-    extracted_fields: dict[str, Any], provenance: list[dict[str, Any]]
-) -> dict[str, Any]:
-    """Normalize extracted data"""
-
-    normalized = {}
-
-    for field_name, raw_value in extracted_fields.items():
-        try:
-            if "amount" in field_name.lower() or "total" in field_name.lower():
-                normalized[field_name] = _normalize_amount(raw_value)
-            elif "date" in field_name.lower():
-                normalized[field_name] = _normalize_date(raw_value)
-            elif "name" in field_name.lower():
-                normalized[field_name] = _normalize_name(raw_value)
-            elif "address" in field_name.lower():
-                normalized[field_name] = _normalize_address(raw_value)
-            elif "number" in field_name.lower():
-                normalized[field_name] = _normalize_number(raw_value)
-            else:
-                normalized[field_name] = _normalize_text(raw_value)
-
-        except Exception as e:
-            logger.warning(
-                "Failed to normalize field",
-                field=field_name,
-                value=raw_value,
-                error=str(e),
-            )
-            normalized[field_name] = raw_value  # Keep original value
-
-    return normalized
-
-
-def _normalize_amount(value: str) -> dict[str, Any]:
-    """Normalize monetary amount"""
-    import re
-
-    if not value:
-        return {"amount": None, "currency": settings.currency_default}
-
-    # Remove currency symbols and formatting
-    clean_value = re.sub(r"[£$€,\s]", "", str(value))
-
-    try:
-        amount = Decimal(clean_value)
-
-        # Validate amount
-        if amount > settings.max_amount:
-            logger.warning("Amount exceeds maximum", amount=amount)
-
-        return {
-            "amount": float(amount),
-            "currency": settings.currency_default,
-            "original": value,
-        }
-    except Exception:
-        return {
-            "amount": None,
-            "currency": settings.currency_default,
-            "original": value,
-        }
-
-
-def _normalize_date(value: str) -> dict[str, Any]:
-    """Normalize date"""
-    from dateutil import parser
-
-    if not value:
-        return {"date": None, "original": value}
-
-    try:
-        # Try parsing with dateutil first
-        parsed_date = parser.parse(str(value), dayfirst=True)
-        return {"date": parsed_date.date().isoformat(), "original": value}
-    except Exception:
-        # Try manual formats
-        for fmt in settings.date_formats:
+async def _normalize_data(extracted_fields: dict[str, Any]) -> dict[str, Any]:
+    """Normalize extracted data into a consistent format"""
+    normalized_data = {}
+    for key, value in extracted_fields.items():
+        # Example: Simple date normalization (can be expanded)
+        if "date" in key.lower() and isinstance(value, str):
            try:
-                parsed_date = datetime.strptime(str(value), fmt)
-                return {"date": parsed_date.date().isoformat(), "original": value}
-            except Exception:
-                continue
-
-        return {"date": None, "original": value}
+                # Attempt to parse various date formats
+                # Add more robust date parsing logic here as needed
+                normalized_data[key] = datetime.fromisoformat(value).date().isoformat()
+            except ValueError:
+                normalized_data[key] = value  # Keep original if parsing fails
+        elif "amount" in key.lower() and isinstance(value, str):
+            # Example: Normalize currency to a Decimal
+            try:
+                normalized_data[key] = float(value.replace("£", "").replace(",", ""))
+            except ValueError:
+                normalized_data[key] = value
+        else:
+            normalized_data[key] = value
+    return normalized_data


-def _normalize_name(value: str) -> dict[str, Any]:
-    """Normalize person/company name"""
-    if not value:
-        return {"name": None, "original": value}
+async def _map_to_kg_ontology(
+    doc_id: str,
+    tenant_id: str,
+    normalized_data: dict[str, Any],
+    provenance: list[dict[str, Any]],
+) -> dict[str, Any]:
+    """Map normalized data to Knowledge Graph ontology nodes and relationships based on kg_schema.json"""
+    nodes = []
+    relationships = []
+    now = datetime.now(UTC).isoformat()

-    # Clean and title case
-    clean_name = str(value).strip().title()
+    # Create a Document node
+    doc_node_id = f"document_{doc_id}"
+    nodes.append(
+        {
+            "id": doc_node_id,
+            "type": "Document",
+            "properties": {
+                "node_type": "Document",
+                "doc_id": doc_id,
+                "kind": normalized_data.get("kind", "OtherSupportingDoc"),
+                "source": normalized_data.get("source", "manual_upload"),
+                "checksum": normalized_data.get("checksum", ""),
+                "valid_from": now,
+                "asserted_at": now,
+                # "source": "svc-normalize-map",
+                "extractor_version": "1.0.0",
+            },
+        }
+    )

-    # Detect if it's a company (contains Ltd, Limited, etc.)
-    company_indicators = ["Ltd", "Limited", "Plc", "Inc", "Corp", "Company"]
-    is_company = any(indicator in clean_name for indicator in company_indicators)
+    # Create a TaxpayerProfile node
+    taxpayer_id = normalized_data.get("taxpayer_id", "unknown_taxpayer")
+    taxpayer_node_id = f"taxpayer_{taxpayer_id}"
+    nodes.append(
+        {
+            "id": taxpayer_node_id,
+            "type": "TaxpayerProfile",
+            "properties": {
+                "node_type": "TaxpayerProfile",
+                "taxpayer_id": taxpayer_id,
+                "type": "Individual",
+                "valid_from": now,
+                "asserted_at": now,
+                "source": "svc-normalize-map",
+                "extractor_version": "1.0.0",
+            },
+        }
+    )
+
+    relationships.append(
+        {
+            "id": f"rel_document_to_taxpayer_{doc_id}",
+            "type": "BELONGS_TO",
+            "sourceId": doc_node_id,
+            "targetId": taxpayer_node_id,
+            "properties": {},
+        }
+    )
+
+    # Create IncomeItem/ExpenseItem nodes and Evidence nodes
+    item_type = (
+        "IncomeItem" if normalized_data.get("kind") == "invoice" else "ExpenseItem"
+    )
+
+    for field, value in normalized_data.items():
+        if field in ["total_amount", "net_amount", "vat_amount", "amount"]:
+            item_id = f"item_{ulid.new()}"
+            item_node_id = f"{item_type.lower()}_{item_id}"
+
+            # Create the financial item node (IncomeItem or ExpenseItem)
+            nodes.append(
+                {
+                    "id": item_node_id,
+                    "type": item_type,
+                    "properties": {
+                        "node_type": item_type,
+                        "type": (
+                            "self_employment"
+                            if "invoice" in normalized_data.get("kind", "")
+                            else "other"
+                        ),
+                        "gross": value,
+                        "currency": "GBP",
+                        "description": normalized_data.get("description", field),
+                        "valid_from": now,
+                        "asserted_at": now,
+                        "source": "svc-normalize-map",
+                        "extractor_version": "1.0.0",
+                    },
+                }
+            )
+
+            relationships.append(
+                {
+                    "id": f"rel_taxpayer_has_{item_type.lower()}_{item_id}",
+                    "type": (
+                        "HAS_INCOME" if item_type == "IncomeItem" else "HAS_EXPENSE"
+                    ),
+                    "sourceId": taxpayer_node_id,
+                    "targetId": item_node_id,
+                    "properties": {},
+                }
+            )
+
+            # Create an Evidence node linking the item to the document
+            prov = next((p for p in provenance if p["field"] == field), None)
+            if prov:
+                evidence_id = f"evidence_{item_id}"
+                nodes.append(
+                    {
+                        "id": evidence_id,
+                        "type": "Evidence",
+                        "properties": {
+                            "node_type": "Evidence",
+                            "snippet_id": evidence_id,
+                            "doc_ref": doc_id,
+                            "page": prov.get("page"),
+                            "bbox": prov.get("bbox"),
+                            "text_hash": "dummy_hash",  # Placeholder
+                            "ocr_confidence": prov.get("confidence"),
+                            "extracted_text": str(value),
+                            "valid_from": now,
+                            "asserted_at": now,
+                            "source": "svc-normalize-map",
+                            "extractor_version": "1.0.0",
+                        },
+                    }
+                )
+
+                relationships.append(
+                    {
+                        "id": f"rel_item_supported_by_evidence_{item_id}",
+                        "type": "SUPPORTED_BY",
+                        "sourceId": item_node_id,
+                        "targetId": evidence_id,
+                        "properties": {},
+                    }
+                )

    return {
-        "name": clean_name,
-        "type": "company" if is_company else "person",
-        "original": value,
+        "nodes": nodes,
+        "relationships": relationships,
+        "document_id": doc_id,
+        "tenant_id": tenant_id,
    }


-def _normalize_address(value: str) -> dict[str, Any]:
-    """Normalize address"""
-    import re
-
-    if not value:
-        return {"address": None, "original": value}
-
-    clean_address = str(value).strip()
-
-    # Extract UK postcode
-    postcode_pattern = r"\b[A-Z]{1,2}\d[A-Z0-9]?\s*\d[A-Z]{2}\b"
-    postcode_match = re.search(postcode_pattern, clean_address, re.IGNORECASE)
-    postcode = postcode_match.group().upper() if postcode_match else None
-
-    return {"address": clean_address, "postcode": postcode, "original": value}
-
-
-def _normalize_number(value: str) -> dict[str, Any]:
-    """Normalize reference numbers"""
-    import re
-
-    if not value:
-        return {"number": None, "original": value}
-
-    # Remove spaces and special characters
-    clean_number = re.sub(r"[^\w]", "", str(value))
-
-    # Detect number type
-    number_type = "unknown"
-    if len(clean_number) == 10 and clean_number.isdigit():
-        number_type = "utr"  # UTR is 10 digits
-    elif len(clean_number) == 8 and clean_number.isdigit():
-        number_type = "account_number"
-    elif re.match(r"^\d{6}$", clean_number):
-        number_type = "sort_code"
-
-    return {"number": clean_number, "type": number_type, "original": value}
-
-
-def _normalize_text(value: str) -> dict[str, Any]:
-    """Normalize general text"""
-    if not value:
-        return {"text": None, "original": value}
-
-    clean_text = str(value).strip()
-
-    return {"text": clean_text, "original": value}
-
-
-async def _map_to_entities(
-    normalized_data: dict[str, Any], doc_id: str, tenant_id: str
-) -> list[dict[str, Any]]:
-    """Map normalized data to knowledge graph entities"""
-
-    entities = []
-
-    # Create document entity
-    doc_entity = {
-        "type": "Document",
-        "id": doc_id,
-        "properties": {
-            "doc_id": doc_id,
-            "tenant_id": tenant_id,
-            "processed_at": datetime.utcnow().isoformat(),
-            "source": "extraction",
-            "extractor_version": "1.0.0",
-            "valid_from": datetime.utcnow(),
-            "asserted_at": datetime.utcnow(),
-        },
-    }
-    entities.append(doc_entity)
-
-    # Map specific field types to entities
-    for field_name, normalized_value in normalized_data.items():
-        if isinstance(normalized_value, dict):
-            if "amount" in normalized_value and normalized_value["amount"] is not None:
-                # Create expense or income item
-                entity_type = (
-                    "ExpenseItem" if "expense" in field_name.lower() else "IncomeItem"
-                )
-                entity = {
-                    "type": entity_type,
-                    "id": f"{entity_type.lower()}_{ulid.new()}",
-                    "properties": {
-                        "amount": normalized_value["amount"],
-                        "currency": normalized_value["currency"],
-                        "description": field_name,
-                        "source": doc_id,
-                        "extractor_version": "1.0.0",
-                        "valid_from": datetime.utcnow(),
-                        "asserted_at": datetime.utcnow(),
-                    },
-                }
-                entities.append(entity)
-
-            elif "name" in normalized_value and normalized_value["name"] is not None:
-                # Create party entity
-                entity = {
-                    "type": "Party",
-                    "id": f"party_{ulid.new()}",
-                    "properties": {
-                        "name": normalized_value["name"],
-                        "party_type": normalized_value.get("type", "unknown"),
-                        "source": doc_id,
-                        "extractor_version": "1.0.0",
-                        "valid_from": datetime.utcnow(),
-                        "asserted_at": datetime.utcnow(),
-                    },
-                }
-                entities.append(entity)
-
-    return entities
-
-
-async def _store_entities(
-    entities: list[dict[str, Any]], tenant_id: str
-) -> list[dict[str, Any]]:
-    """Store entities in knowledge graph"""
-
-    stored_entities = []
-
-    for entity in entities:
-        try:
-            # Create node in Neo4j
-            result = await neo4j_client.create_node(
-                label=entity["type"], properties=entity["properties"]
-            )
-
-            stored_entities.append(
-                {
-                    "type": entity["type"],
-                    "id": entity["id"],
-                    "neo4j_id": result.get("id"),
-                    "properties": entity["properties"],
-                }
-            )
-
-            logger.debug("Entity stored", type=entity["type"], id=entity["id"])
-
-        except Exception as e:
-            logger.error("Failed to store entity", entity=entity, error=str(e))
-
-    return stored_entities
-
-
-
@app.exception_handler(HTTPException)
 async def http_exception_handler(request: Request, exc: HTTPException) -> JSONResponse:
    """Handle HTTP exceptions with RFC7807 format"""
@@ -579,8 +330,8 @@ async def http_exception_handler(request: Request, exc: HTTPException) -> JSONRe
            status=exc.status_code,
            detail=exc.detail,
            instance=str(request.url),
-            trace_id="",
-        ).dict(),
+            trace_id=getattr(request.state, "trace_id", None),
+        ).model_dump(),
    )


--- a/apps/svc_normalize_map/requirements.txt
+++ b/apps/svc_normalize_map/requirements.txt
@@ -1,37 +1 @@
-# FastAPI and server
-fastapi>=0.118.3
-uvicorn[standard]>=0.37.0
-pydantic>=2.12.0
-
-# Service-specific dependencies
-# Data normalization and cleaning
-pandas>=2.3.3
-numpy>=2.3.3
-
-# Currency and exchange rates
-forex-python>=1.9.2
-babel>=2.17.0
-
-# Date and time processing
-python-dateutil>=2.9.0
-pytz>=2025.2
-
-# Text normalization
-unidecode>=1.4.0
-phonenumbers>=9.0.16
-
-# Entity resolution and matching
-recordlinkage>=0.16.0
-fuzzywuzzy>=0.18.0
-python-Levenshtein>=0.27.1
-
-# Geographic data
-geopy>=2.4.1
-pycountry>=24.6.1
-
-# Data validation
-cerberus>=1.3.7
-marshmallow>=4.0.1
-
-# UK-specific utilities
-uk-postcode-utils>=1.1
+python-ulid
--- a/apps/svc_ocr/main.py
+++ b/apps/svc_ocr/main.py
@@ -7,13 +7,14 @@ import os

 # Import shared libraries
 import sys
+from contextlib import asynccontextmanager
 from datetime import datetime
 from typing import Any, cast

 import pytesseract
 import structlog
 import ulid
-from fastapi import BackgroundTasks, Depends, HTTPException, Request
+from fastapi import BackgroundTasks, Depends, FastAPI, HTTPException, Request
 from fastapi.responses import JSONResponse
 from pdf2image import convert_from_bytes
 from PIL import Image
@@ -78,6 +79,8 @@ settings: OCRSettings
 async def init_dependencies(app_settings: OCRSettings) -> None:
    """Initialize service dependencies"""
    global storage_client, document_storage, event_bus, settings, vision_processor
+    # Larger delay to ensure NATS is fully ready before attempting connection
+    await asyncio.sleep(10)

    settings = app_settings
    logger.info("Starting OCR service")
@@ -89,17 +92,35 @@ async def init_dependencies(app_settings: OCRSettings) -> None:
    minio_client = create_minio_client(settings)
    storage_client = StorageClient(minio_client)
    document_storage = DocumentStorage(storage_client)
-    # Initialize event bus
-    event_bus = create_event_bus(settings)
-    if not event_bus:
-        raise HTTPException(status_code=500, detail="Event bus not initialized")
-
-    eb = event_bus
-    # mypy: event_bus is Optional, so use local alias after check
-    await eb.start()
-
-    # Subscribe to document ingestion events
-    await eb.subscribe(EventTopics.DOC_INGESTED, _handle_document_ingested)
+    # Initialize event bus with retry logic
+    max_retries = 20
+    delay = 5
+    for attempt in range(1, max_retries + 1):
+        logger.info(
+            "Attempting NATS connection", url=settings.nats_servers, attempt=attempt
+        )
+        event_bus = create_event_bus(settings)
+        if not event_bus:
+            raise HTTPException(status_code=500, detail="Event bus not initialized")
+        eb = event_bus
+        try:
+            # Attempt to start and subscribe
+            await eb.start()
+            await eb.subscribe(EventTopics.DOC_INGESTED, _handle_document_ingested)
+            logger.info("NATS connection established on attempt", attempt=attempt)
+            break
+        except Exception as e:
+            logger.error(
+                "Failed to connect to NATS, retrying",
+                attempt=attempt,
+                error=str(e),
+            )
+            if attempt == max_retries:
+                raise HTTPException(
+                    status_code=500, detail="Failed to connect to NATS after retries"
+                )
+            await asyncio.sleep(delay)
+            delay *= 2  # exponential backoff

    # Initialize shared OCRProcessor for vision strategy
    try:
@@ -114,7 +135,26 @@ async def init_dependencies(app_settings: OCRSettings) -> None:
    logger.info("OCR service started successfully")


-# Create app and settings
+async def shutdown_dependencies() -> None:
+    """Shutdown service dependencies"""
+    logger.info("Shutting down OCR service")
+    eb = event_bus
+    if eb is not None:
+        await eb.stop()
+    logger.info("OCR service shutdown complete")
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):  # type: ignore
+    """FastAPI lifespan event handler"""
+    # Startup
+    await init_dependencies(cast(OCRSettings, _settings))
+    yield
+    # Shutdown
+    await shutdown_dependencies()
+
+
+# Create app and settings with lifespan
 app, _settings = create_app(
    service_name="svc-ocr",
    title="Tax Agent OCR Service",
@@ -122,8 +162,8 @@ app, _settings = create_app(
    settings_class=OCRSettings,
 )  # fmt: skip

-# Initialize dependencies immediately
-asyncio.run(init_dependencies(cast(OCRSettings, _settings)))
+# Override app's lifespan
+app.router.lifespan_context = lifespan

 tracer = get_tracer("svc-ocr")
 metrics = get_metrics()
--- a/apps/svc_ocr/requirements.txt
+++ b/apps/svc_ocr/requirements.txt
@@ -14,3 +14,12 @@ opencv-python-headless>=4.12.0.88  # Headless version is smaller

 # Computer vision (torchvision not in base-ml)
 torchvision>=0.23.0
+
+# OpenTelemetry (required by libs/observability)
+opentelemetry-api>=1.21.0
+opentelemetry-sdk>=1.21.0
+opentelemetry-exporter-otlp-proto-grpc>=1.21.0
+opentelemetry-instrumentation-fastapi>=0.42b0
+opentelemetry-instrumentation-httpx>=0.42b0
+opentelemetry-instrumentation-psycopg2>=0.42b0
+opentelemetry-instrumentation-redis>=0.42b0
--- a/apps/svc_rag_indexer/Dockerfile
+++ b/apps/svc_rag_indexer/Dockerfile
@@ -10,12 +10,15 @@ FROM ${REGISTRY}/${OWNER}/base-ml:${BASE_VERSION}
 # Switch to root to install service-specific dependencies
 USER root

+RUN apt-get update && apt-get install -y build-essential
+
 # Set working directory
 WORKDIR /app

 # Copy service-specific requirements and install
+COPY libs/requirements-base.txt /tmp/libs-requirements.txt
 COPY apps/svc_rag_indexer/requirements.txt /tmp/service-requirements.txt
-RUN pip install --no-cache-dir -r /tmp/service-requirements.txt
+RUN pip install --no-cache-dir -r /tmp/libs-requirements.txt -r /tmp/service-requirements.txt

 # Copy application code
 COPY libs/ ./libs/
@@ -26,7 +29,7 @@ RUN chown -R appuser:appuser /app
 USER appuser

 # Health check
-HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
+HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
    CMD curl -f http://localhost:8000/healthz || exit 1

 # Expose port
--- a/apps/svc_rag_retriever/Dockerfile
+++ b/apps/svc_rag_retriever/Dockerfile
@@ -10,12 +10,15 @@ FROM ${REGISTRY}/${OWNER}/base-ml:${BASE_VERSION}
 # Switch to root to install service-specific dependencies
 USER root

+RUN apt-get update && apt-get install -y build-essential
+
 # Set working directory
 WORKDIR /app

 # Copy service-specific requirements and install
+COPY libs/requirements-base.txt /tmp/libs-requirements.txt
 COPY apps/svc_rag_retriever/requirements.txt /tmp/service-requirements.txt
-RUN pip install --no-cache-dir -r /tmp/service-requirements.txt
+RUN pip install --no-cache-dir -r /tmp/libs-requirements.txt -r /tmp/service-requirements.txt

 # Copy application code
 COPY libs/ ./libs/
--- a/apps/svc_reason/Dockerfile
+++ b/apps/svc_reason/Dockerfile
@@ -43,7 +43,7 @@ RUN chown -R appuser:appuser /app
 USER appuser

 # Health check
-HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
+HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
    CMD curl -f http://localhost:8000/healthz || exit 1

 # Expose port
--- a/apps/svc_reason/main.py
+++ b/apps/svc_reason/main.py
@@ -17,6 +17,7 @@ from datetime import datetime
 from decimal import Decimal
 from typing import Any

+import httpx
 import structlog
 import ulid
 from fastapi import BackgroundTasks, Depends, HTTPException, Request
@@ -55,6 +56,9 @@ class ReasonSettings(BaseAppSettings):
    max_income: float = 10000000.0  # £10M
    max_expenses: float = 10000000.0  # £10M

+    # External services
+    coverage_service_url: str = "http://svc-coverage:8000"
+

 # Create app and settings
 app, settings = create_app(
@@ -67,6 +71,7 @@ app, settings = create_app(
 # Global clients
 neo4j_client: Neo4jClient | None = None
 event_bus: EventBus | None = None
+http_client: httpx.AsyncClient | None = None
 tracer = get_tracer("svc-reason")
 metrics = get_metrics()

@@ -74,7 +79,7 @@ metrics = get_metrics()
@app.on_event("startup")
 async def startup_event() -> None:
    """Initialize service dependencies"""
-    global neo4j_client, event_bus
+    global neo4j_client, event_bus, http_client

    logger.info("Starting reasoning service")

@@ -89,6 +94,9 @@ async def startup_event() -> None:
    event_bus = create_event_bus(settings)
    await event_bus.start() # fmt: skip# pyright: ignore[reportOptionalMemberAccess]

+    # Initialize HTTP client
+    http_client = httpx.AsyncClient()
+
    # Subscribe to KG upsert events
    await event_bus.subscribe(EventTopics.KG_UPSERTED, _handle_kg_upserted)  # type: ignore

@@ -98,7 +106,7 @@ async def startup_event() -> None:
@app.on_event("shutdown")
 async def shutdown_event() -> None:
    """Cleanup service dependencies"""
-    global neo4j_client, event_bus
+    global neo4j_client, event_bus, http_client

    logger.info("Shutting down reasoning service")

@@ -108,6 +116,9 @@ async def shutdown_event() -> None:
    if event_bus:
        await event_bus.stop()

+    if http_client:
+        await http_client.aclose()
+
    logger.info("Reasoning service shutdown complete")


@@ -259,41 +270,76 @@ async def get_calculation_results(


 async def _handle_kg_upserted(topic: str, payload: EventPayload) -> None:
-    """Handle KG upsert events for auto-calculation"""
+    """Handle KG upsert events for auto-calculation and coverage check"""
+    data = payload.data
+    taxpayer_id = data.get("taxpayer_id")
+    tax_year = data.get("tax_year")
+    tenant_id = data.get("tenant_id")
+
+    if not taxpayer_id or not tax_year or not tenant_id:
+        logger.warning("Invalid KG upsert event data for coverage check", data=data)
+        return
+
+    # Trigger svc-coverage check
    try:
-        data = payload.data
-        entities = data.get("entities", [])
-        tenant_id = data.get("tenant_id")
-
-        # Check if we have enough data for calculation
-        has_income = any(e.get("type") == "IncomeItem" for e in entities)
-        has_expenses = any(e.get("type") == "ExpenseItem" for e in entities)
-
-        if has_income or has_expenses:
+        if http_client:
+            coverage_url = f"{settings.coverage_service_url}/v1/coverage/check"
+            request_body = {
+                "tax_year": tax_year,
+                "taxpayer_id": taxpayer_id,
+            }
+            headers = {
+                "X-Tenant-ID": tenant_id,
+                # Assuming current_user is not directly available here,
+                # or a system user token needs to be generated.
+                # For now, omitting X-Authenticated-User for simplicity,
+                # but in a real system, this should be handled securely.
+            }
+            response = await http_client.post(coverage_url, json=request_body, headers=headers)
+            response.raise_for_status()
+            coverage_report = response.json()
            logger.info(
-                "Auto-triggering calculation due to new financial data",
-                tenant_id=tenant_id,
+                "Triggered svc-coverage check",
+                taxpayer_id=taxpayer_id,
+                tax_year=tax_year,
+                coverage_status=coverage_report.get("overall_status"),
            )

-            # Find taxpayer ID from entities
-            taxpayer_id = None
-            for entity in entities:
-                if entity.get("type") == "TaxpayerProfile":
-                    taxpayer_id = entity.get("id")
-                    break
-
-            if taxpayer_id:
+            # If coverage is complete, trigger calculation
+            if coverage_report.get("overall_status") == "complete":
+                logger.info(
+                    "Coverage complete, auto-triggering calculation",
+                    taxpayer_id=taxpayer_id,
+                    tax_year=tax_year,
+                )
                await _compute_schedule_async(
-                    tax_year=settings.current_tax_year,
+                    tax_year=tax_year,
                    taxpayer_id=taxpayer_id,
                    schedule_id="SA103",  # Default to self-employment
-                    tenant_id=tenant_id or "",
+                    tenant_id=tenant_id,
                    calculation_id=str(ulid.new()),
                    actor=payload.actor,
                )
+            else:
+                logger.info(
+                    "Coverage incomplete, not triggering calculation",
+                    taxpayer_id=taxpayer_id,
+                    tax_year=tax_year,
+                    blocking_items=coverage_report.get("blocking_items"),
+                )

+
+    except httpx.HTTPStatusError as e:
+        logger.error(
+            "Failed to trigger svc-coverage check due to HTTP error",
+            taxpayer_id=taxpayer_id,
+            tax_year=tax_year,
+            error=str(e),
+            response_status_code=e.response.status_code,
+            response_text=e.response.text,
+        )
    except Exception as e:
-        logger.error("Failed to handle KG upsert for auto-calculation", error=str(e))
+        logger.error("Failed to handle KG upsert for auto-calculation or coverage check", error=str(e))


 async def _compute_schedule_async(
@@ -570,16 +616,107 @@ async def _compute_sa105(
 async def _compute_sa100(
    financial_data: dict[str, Any], tax_year: str
 ) -> tuple[dict[str, Any], list[dict[str, Any]]]:
-    """Compute SA100 (Main return) schedule"""
-
-    # This would aggregate from other schedules
-    # For now, return basic structure
-    form_boxes = {
-        "1": {"value": "John Doe", "description": "Your name", "confidence": 0.9}
-    }
+    """Compute SA100 (Main return) schedule by aggregating other schedules"""

+    form_boxes = {}
    evidence_trail: list[dict[str, Any]] = []

+    taxpayer_id = financial_data.get("taxpayer_id")
+    tenant_id = financial_data.get("tenant_id") # Assuming tenant_id is passed in financial_data
+
+    if not taxpayer_id or not tenant_id:
+        raise ValueError("Taxpayer ID or Tenant ID missing for SA100 computation")
+
+    # Get latest SA103 calculation
+    sa103_query = """
+    MATCH (t:TaxpayerProfile {taxpayer_id: $taxpayer_id, tenant_id: $tenant_id})-[:HAS_CALCULATION]->(c:Calculation)
+    WHERE c.schedule = 'SA103' AND c.tax_year = $tax_year AND c.retracted_at IS NULL
+    OPTIONAL MATCH (c)-[:HAS_BOX]->(b:FormBox)
+    RETURN c.calculation_id AS calculation_id, c.calculated_at AS calculated_at, COLLECT({box: b.box, value: b.value, description: b.description, confidence: b.confidence}) AS form_boxes
+    ORDER BY c.calculated_at DESC
+    LIMIT 1
+    """
+    sa103_results = await neo4j_client.run_query( # type: ignore
+        sa103_query, {"taxpayer_id": taxpayer_id, "tenant_id": tenant_id, "tax_year": tax_year}
+    )
+    sa103_calc = sa103_results[0] if sa103_results else None
+
+    sa103_net_profit = Decimal("0")
+    if sa103_calc and sa103_calc["form_boxes"]:
+        for box in sa103_calc["form_boxes"]:
+            if box["box"] == "32": # Net profit box in SA103
+                sa103_net_profit = Decimal(str(box["value"]))
+                form_boxes["SA103_32"] = {"value": float(sa103_net_profit), "description": "SA103 Net Profit", "confidence": box.get("confidence", 0.9)}
+                evidence_trail.append({
+                    "box": "SA103_32",
+                    "source_calculation_id": sa103_calc["calculation_id"],
+                    "description": "Derived from SA103 Net Profit"
+                })
+                break
+
+    # Get latest SA105 calculation
+    sa105_query = """
+    MATCH (t:TaxpayerProfile {taxpayer_id: $taxpayer_id, tenant_id: $tenant_id})-[:HAS_CALCULATION]->(c:Calculation)
+    WHERE c.schedule = 'SA105' AND c.tax_year = $tax_year AND c.retracted_at IS NULL
+    OPTIONAL MATCH (c)-[:HAS_BOX]->(b:FormBox)
+    RETURN c.calculation_id AS calculation_id, c.calculated_at AS calculated_at, COLLECT({box: b.box, value: b.value, description: b.description, confidence: b.confidence}) AS form_boxes
+    ORDER BY c.calculated_at DESC
+    LIMIT 1
+    """
+    sa105_results = await neo4j_client.run_query( # type: ignore
+        sa105_query, {"taxpayer_id": taxpayer_id, "tenant_id": tenant_id, "tax_year": tax_year}
+    )
+    sa105_calc = sa105_results[0] if sa105_results else None
+
+    sa105_net_income = Decimal("0")
+    if sa105_calc and sa105_calc["form_boxes"]:
+        for box in sa105_calc["form_boxes"]:
+            if box["box"] == "net_income": # Net property income box in SA105 (custom box for internal calculation)
+                sa105_net_income = Decimal(str(box["value"]))
+                form_boxes["SA105_net_income"] = {"value": float(sa105_net_income), "description": "SA105 Net Property Income", "confidence": box.get("confidence", 0.9)}
+                evidence_trail.append({
+                    "box": "SA105_net_income",
+                    "source_calculation_id": sa105_calc["calculation_id"],
+                    "description": "Derived from SA105 Net Property Income"
+                })
+                break
+
+    # Aggregate total income for SA100
+    total_income = sa103_net_profit + sa105_net_income
+    form_boxes["SA100_total_income"] = {
+        "value": float(total_income),
+        "description": "Total income from all sources",
+        "confidence": 0.95 # Higher confidence for aggregated value
+    }
+    evidence_trail.append({
+        "box": "SA100_total_income",
+        "derived_from": ["SA103_32", "SA105_net_income"],
+        "description": "Aggregated from SA103 net profit and SA105 net property income"
+    })
+
+    # Example: Basic personal allowance (simplified)
+    personal_allowance = Decimal("12570") # For 2023-24
+    if total_income > Decimal("100000"): # Tapering not implemented here
+        personal_allowance = Decimal("0")
+
+    form_boxes["SA100_personal_allowance"] = {
+        "value": float(personal_allowance),
+        "description": "Personal Allowance",
+        "confidence": 0.99
+    }
+    evidence_trail.append({
+        "box": "SA100_personal_allowance",
+        "source": "HMRC_guidance",
+        "description": f"Standard personal allowance for {tax_year}"
+    })
+
+
+    # Placeholder for actual SA100 boxes and complex calculations
+    # This would involve detailed tax band calculations, reliefs, etc.
+    # For now, we'll just show the aggregation.
+    form_boxes["1"] = {"value": "John Doe (Aggregated)", "description": "Your name", "confidence": 0.9}
+
+
    return form_boxes, evidence_trail


--- a/apps/svc_reason/requirements.txt
+++ b/apps/svc_reason/requirements.txt
@@ -33,3 +33,4 @@ jinja2>=3.1.6

 # Statistical calculations
 scipy>=1.16.2
+httpx