Files
ai-tax-agent/tests/unit/test_event_schemas.py
harkon fdba81809f
Some checks failed
CI/CD Pipeline / Generate SBOM (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Code Quality & Linting (push) Has been cancelled
CI/CD Pipeline / Policy Validation (push) Has been cancelled
CI/CD Pipeline / Test Suite (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-firm-connectors) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-forms) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-hmrc) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ingestion) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-normalize-map) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ocr) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-indexer) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-reason) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rpa) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (ui-review) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (ui-review) (push) Has been cancelled
CI/CD Pipeline / Notifications (push) Has been cancelled
completed local setup with compose
2025-11-26 13:17:17 +00:00

501 lines
17 KiB
Python

"""Tests for event schema validation."""
import pytest
from pydantic import ValidationError
from libs.events.topics import EventTopics
from libs.schemas.events import (
EVENT_SCHEMA_MAP,
CalculationReadyEventData,
DocumentExtractedEventData,
DocumentIngestedEventData,
DocumentOCRReadyEventData,
FirmSyncCompletedEventData,
FormFilledEventData,
HMRCSubmittedEventData,
KGUpsertedEventData,
KGUpsertReadyEventData,
RAGIndexedEventData,
ReviewCompletedEventData,
ReviewRequestedEventData,
get_schema_for_topic,
validate_event_data,
)
class TestDocumentIngestedEventData:
"""Test DocumentIngestedEventData schema."""
def test_valid_event(self) -> None:
"""Test creating a valid document ingested event."""
data = DocumentIngestedEventData(
doc_id="01H8Y9Z5M3K7N2P4Q6R8T0V1W3",
filename="invoice_2024.pdf",
mime_type="application/pdf",
size_bytes=102400,
checksum_sha256="a" * 64,
kind="invoice",
source="manual_upload",
storage_path="raw-documents/2024/invoice_2024.pdf",
)
assert data.doc_id == "01H8Y9Z5M3K7N2P4Q6R8T0V1W3"
assert data.size_bytes == 102400
assert len(data.checksum_sha256) == 64
def test_invalid_checksum(self) -> None:
"""Test invalid SHA-256 checksum."""
with pytest.raises(ValidationError) as exc_info:
DocumentIngestedEventData(
doc_id="01H8Y9Z5M3K7N2P4Q6R8T0V1W3",
filename="test.pdf",
mime_type="application/pdf",
size_bytes=1024,
checksum_sha256="invalid", # Too short
kind="invoice",
source="manual_upload",
storage_path="path/to/file",
)
assert "Invalid SHA-256 checksum format" in str(exc_info.value)
def test_negative_size(self) -> None:
"""Test negative file size validation."""
with pytest.raises(ValidationError):
DocumentIngestedEventData(
doc_id="01H8Y9Z5M3K7N2P4Q6R8T0V1W3",
filename="test.pdf",
mime_type="application/pdf",
size_bytes=-1, # Negative size
checksum_sha256="a" * 64,
kind="invoice",
source="manual_upload",
storage_path="path/to/file",
)
def test_immutable(self) -> None:
"""Test that event data is immutable."""
data = DocumentIngestedEventData(
doc_id="01H8Y9Z5M3K7N2P4Q6R8T0V1W3",
filename="test.pdf",
mime_type="application/pdf",
size_bytes=1024,
checksum_sha256="a" * 64,
kind="invoice",
source="manual_upload",
storage_path="path/to/file",
)
with pytest.raises(ValidationError):
data.filename = "changed.pdf" # Should raise because frozen=True
class TestDocumentOCRReadyEventData:
"""Test DocumentOCRReadyEventData schema."""
def test_valid_event(self) -> None:
"""Test creating a valid OCR ready event."""
data = DocumentOCRReadyEventData(
doc_id="01H8Y9Z5M3K7N2P4Q6R8T0V1W3",
ocr_engine="tesseract",
page_count=3,
confidence_avg=0.95,
text_length=5000,
layout_detected=True,
languages_detected=["en"],
processing_time_ms=1500,
storage_path="ocr-results/doc_123.json",
)
assert data.ocr_engine == "tesseract"
assert data.confidence_avg == 0.95
assert 0.0 <= data.confidence_avg <= 1.0
def test_invalid_confidence(self) -> None:
"""Test invalid confidence score."""
with pytest.raises(ValidationError):
DocumentOCRReadyEventData(
doc_id="123",
ocr_engine="tesseract",
page_count=1,
confidence_avg=1.5, # > 1.0
text_length=100,
layout_detected=True,
processing_time_ms=1000,
storage_path="path",
)
def test_invalid_ocr_engine(self) -> None:
"""Test invalid OCR engine value."""
with pytest.raises(ValidationError):
DocumentOCRReadyEventData(
doc_id="123",
ocr_engine="invalid_engine", # Not in allowed values
page_count=1,
confidence_avg=0.9,
text_length=100,
layout_detected=True,
processing_time_ms=1000,
storage_path="path",
)
class TestDocumentExtractedEventData:
"""Test DocumentExtractedEventData schema."""
def test_valid_event(self) -> None:
"""Test creating a valid extraction event."""
data = DocumentExtractedEventData(
doc_id="01H8Y9Z5M3K7N2P4Q6R8T0V1W3",
extraction_id="extr_123",
strategy="hybrid",
fields_extracted=15,
confidence_avg=0.88,
calibrated_confidence=0.91,
model_name="gpt-4",
processing_time_ms=3000,
storage_path="extractions/extr_123.json",
)
assert data.strategy == "hybrid"
assert data.model_name == "gpt-4"
def test_valid_without_model(self) -> None:
"""Test extraction event without model (rules-based)."""
data = DocumentExtractedEventData(
doc_id="123",
extraction_id="extr_456",
strategy="rules",
fields_extracted=10,
confidence_avg=0.95,
calibrated_confidence=0.93,
model_name=None, # No model for rules-based
processing_time_ms=500,
storage_path="path",
)
assert data.model_name is None
assert data.strategy == "rules"
class TestKGEvents:
"""Test Knowledge Graph event schemas."""
def test_kg_upsert_ready(self) -> None:
"""Test KG upsert ready event."""
data = KGUpsertReadyEventData(
doc_id="01H8Y9Z5M3K7N2P4Q6R8T0V1W3",
entity_count=25,
relationship_count=40,
tax_year="2024-25",
taxpayer_id="TP-001",
normalization_id="norm_123",
storage_path="normalized/norm_123.json",
)
assert data.entity_count == 25
assert data.tax_year == "2024-25"
def test_kg_upserted(self) -> None:
"""Test KG upserted event."""
data = KGUpsertedEventData(
doc_id="01H8Y9Z5M3K7N2P4Q6R8T0V1W3",
entities_created=10,
entities_updated=5,
relationships_created=20,
relationships_updated=10,
shacl_violations=0,
processing_time_ms=2000,
success=True,
error_message=None,
)
assert data.success is True
assert data.shacl_violations == 0
def test_kg_upserted_with_violations(self) -> None:
"""Test KG upserted event with SHACL violations."""
data = KGUpsertedEventData(
doc_id="123",
entities_created=5,
entities_updated=0,
relationships_created=8,
relationships_updated=0,
shacl_violations=3,
processing_time_ms=1500,
success=False,
error_message="SHACL validation failed: Missing required property",
)
assert data.success is False
assert data.shacl_violations == 3
assert data.error_message is not None
class TestRAGIndexedEventData:
"""Test RAG indexed event schema."""
def test_valid_event(self) -> None:
"""Test creating a valid RAG indexed event."""
data = RAGIndexedEventData(
doc_id="01H8Y9Z5M3K7N2P4Q6R8T0V1W3",
collection_name="firm_knowledge",
chunks_indexed=45,
embedding_model="bge-small-en-v1.5",
pii_detected=True,
pii_redacted=True,
processing_time_ms=5000,
storage_path="chunks/doc_123.json",
)
assert data.pii_detected is True
assert data.pii_redacted is True
assert data.chunks_indexed == 45
class TestCalculationReadyEventData:
"""Test calculation ready event schema."""
def test_valid_event(self) -> None:
"""Test creating a valid calculation event."""
data = CalculationReadyEventData(
taxpayer_id="TP-001",
tax_year="2024-25",
schedule_id="SA103",
calculation_id="calc_789",
boxes_computed=50,
total_income=85000.50,
total_tax=18500.25,
confidence=0.92,
evidence_count=15,
processing_time_ms=2500,
storage_path="calculations/calc_789.json",
)
assert data.schedule_id == "SA103"
assert data.total_income == 85000.50
assert data.total_tax == 18500.25
def test_valid_without_totals(self) -> None:
"""Test calculation event without totals (partial calculation)."""
data = CalculationReadyEventData(
taxpayer_id="TP-001",
tax_year="2024-25",
schedule_id="SA102",
calculation_id="calc_456",
boxes_computed=20,
total_income=None,
total_tax=None,
confidence=0.85,
evidence_count=10,
processing_time_ms=1000,
storage_path="calculations/calc_456.json",
)
assert data.total_income is None
assert data.total_tax is None
class TestFormFilledEventData:
"""Test form filled event schema."""
def test_valid_event(self) -> None:
"""Test creating a valid form filled event."""
data = FormFilledEventData(
taxpayer_id="TP-001",
tax_year="2024-25",
form_id="SA100",
fields_filled=75,
pdf_size_bytes=524288,
storage_path="forms/SA100_filled.pdf",
evidence_bundle_path="evidence/bundle_123.zip",
checksum_sha256="b" * 64,
)
assert data.form_id == "SA100"
assert data.evidence_bundle_path is not None
class TestHMRCSubmittedEventData:
"""Test HMRC submitted event schema."""
def test_successful_submission(self) -> None:
"""Test successful HMRC submission."""
data = HMRCSubmittedEventData(
taxpayer_id="TP-001",
tax_year="2024-25",
submission_id="sub_999",
hmrc_reference="HMRC-REF-12345",
submission_type="sandbox",
success=True,
status_code=200,
error_message=None,
processing_time_ms=3000,
)
assert data.success is True
assert data.hmrc_reference is not None
def test_failed_submission(self) -> None:
"""Test failed HMRC submission."""
data = HMRCSubmittedEventData(
taxpayer_id="TP-001",
tax_year="2024-25",
submission_id="sub_888",
hmrc_reference=None,
submission_type="live",
success=False,
status_code=400,
error_message="Invalid UTR number",
processing_time_ms=1500,
)
assert data.success is False
assert data.error_message is not None
def test_invalid_submission_type(self) -> None:
"""Test invalid submission type."""
with pytest.raises(ValidationError):
HMRCSubmittedEventData(
taxpayer_id="TP-001",
tax_year="2024-25",
submission_id="sub_777",
hmrc_reference=None,
submission_type="invalid", # Not in allowed values
success=False,
status_code=None,
error_message=None,
processing_time_ms=1000,
)
class TestReviewEvents:
"""Test review event schemas."""
def test_review_requested(self) -> None:
"""Test review requested event."""
data = ReviewRequestedEventData(
doc_id="01H8Y9Z5M3K7N2P4Q6R8T0V1W3",
review_type="extraction",
priority="high",
reason="Low confidence extraction (0.65)",
assigned_to="reviewer@example.com",
due_date="2024-12-01T10:00:00Z",
metadata={"extraction_id": "extr_123"},
)
assert data.priority == "high"
assert data.review_type == "extraction"
def test_review_completed(self) -> None:
"""Test review completed event."""
data = ReviewCompletedEventData(
doc_id="01H8Y9Z5M3K7N2P4Q6R8T0V1W3",
review_id="rev_456",
reviewer="reviewer@example.com",
decision="approved",
changes_made=3,
comments="Fixed vendor name and amount",
review_duration_seconds=180,
)
assert data.decision == "approved"
assert data.changes_made == 3
class TestFirmSyncCompletedEventData:
"""Test firm sync completed event schema."""
def test_successful_sync(self) -> None:
"""Test successful firm sync."""
data = FirmSyncCompletedEventData(
firm_id="FIRM-001",
connector_type="xero",
sync_id="sync_123",
records_synced=150,
records_created=50,
records_updated=100,
records_failed=0,
success=True,
error_message=None,
processing_time_ms=10000,
)
assert data.success is True
assert data.records_failed == 0
def test_partial_sync_failure(self) -> None:
"""Test sync with some failures."""
data = FirmSyncCompletedEventData(
firm_id="FIRM-002",
connector_type="sage",
sync_id="sync_456",
records_synced=90,
records_created=30,
records_updated=60,
records_failed=10,
success=True, # Overall success despite some failures
error_message="10 records failed validation",
processing_time_ms=15000,
)
assert data.records_failed == 10
assert data.error_message is not None
class TestSchemaMapping:
"""Test schema mapping and validation utilities."""
def test_all_topics_have_schemas(self) -> None:
"""Test that all topics in EventTopics have corresponding schemas."""
topic_values = {
getattr(EventTopics, attr)
for attr in dir(EventTopics)
if not attr.startswith("_")
}
schema_topics = set(EVENT_SCHEMA_MAP.keys())
# All event topics should have schemas
missing_schemas = topic_values - schema_topics
assert not missing_schemas, f"Missing schemas for topics: {missing_schemas}"
def test_validate_event_data(self) -> None:
"""Test validate_event_data function."""
valid_data = {
"doc_id": "01H8Y9Z5M3K7N2P4Q6R8T0V1W3",
"filename": "test.pdf",
"mime_type": "application/pdf",
"size_bytes": 1024,
"checksum_sha256": "a" * 64,
"kind": "invoice",
"source": "manual_upload",
"storage_path": "path/to/file",
}
result = validate_event_data("doc.ingested", valid_data)
assert isinstance(result, DocumentIngestedEventData)
assert result.doc_id == "01H8Y9Z5M3K7N2P4Q6R8T0V1W3"
def test_validate_unknown_topic(self) -> None:
"""Test validation with unknown topic."""
with pytest.raises(ValueError, match="Unknown event topic"):
validate_event_data("unknown.topic", {})
def test_validate_invalid_data(self) -> None:
"""Test validation with invalid data."""
invalid_data = {
"doc_id": "123",
"filename": "test.pdf",
# Missing required fields
}
with pytest.raises(ValidationError):
validate_event_data("doc.ingested", invalid_data)
def test_get_schema_for_topic(self) -> None:
"""Test get_schema_for_topic function."""
schema = get_schema_for_topic("doc.ingested")
assert schema == DocumentIngestedEventData
def test_get_schema_unknown_topic(self) -> None:
"""Test get_schema_for_topic with unknown topic."""
with pytest.raises(ValueError, match="Unknown event topic"):
get_schema_for_topic("unknown.topic")
def test_schema_prevents_extra_fields(self) -> None:
"""Test that schemas prevent extra fields (extra='forbid')."""
with pytest.raises(ValidationError) as exc_info:
DocumentIngestedEventData(
doc_id="123",
filename="test.pdf",
mime_type="application/pdf",
size_bytes=1024,
checksum_sha256="a" * 64,
kind="invoice",
source="manual_upload",
storage_path="path",
unexpected_field="should_fail", # Extra field
)
assert "Extra inputs are not permitted" in str(exc_info.value)