Some checks failed
CI/CD Pipeline / Generate SBOM (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Code Quality & Linting (push) Has been cancelled
CI/CD Pipeline / Policy Validation (push) Has been cancelled
CI/CD Pipeline / Test Suite (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-firm-connectors) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-forms) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-hmrc) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ingestion) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-normalize-map) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ocr) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-indexer) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-reason) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rpa) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (ui-review) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (ui-review) (push) Has been cancelled
CI/CD Pipeline / Notifications (push) Has been cancelled
501 lines
17 KiB
Python
501 lines
17 KiB
Python
"""Tests for event schema validation."""
|
|
|
|
import pytest
|
|
from pydantic import ValidationError
|
|
|
|
from libs.events.topics import EventTopics
|
|
from libs.schemas.events import (
|
|
EVENT_SCHEMA_MAP,
|
|
CalculationReadyEventData,
|
|
DocumentExtractedEventData,
|
|
DocumentIngestedEventData,
|
|
DocumentOCRReadyEventData,
|
|
FirmSyncCompletedEventData,
|
|
FormFilledEventData,
|
|
HMRCSubmittedEventData,
|
|
KGUpsertedEventData,
|
|
KGUpsertReadyEventData,
|
|
RAGIndexedEventData,
|
|
ReviewCompletedEventData,
|
|
ReviewRequestedEventData,
|
|
get_schema_for_topic,
|
|
validate_event_data,
|
|
)
|
|
|
|
|
|
class TestDocumentIngestedEventData:
|
|
"""Test DocumentIngestedEventData schema."""
|
|
|
|
def test_valid_event(self) -> None:
|
|
"""Test creating a valid document ingested event."""
|
|
data = DocumentIngestedEventData(
|
|
doc_id="01H8Y9Z5M3K7N2P4Q6R8T0V1W3",
|
|
filename="invoice_2024.pdf",
|
|
mime_type="application/pdf",
|
|
size_bytes=102400,
|
|
checksum_sha256="a" * 64,
|
|
kind="invoice",
|
|
source="manual_upload",
|
|
storage_path="raw-documents/2024/invoice_2024.pdf",
|
|
)
|
|
assert data.doc_id == "01H8Y9Z5M3K7N2P4Q6R8T0V1W3"
|
|
assert data.size_bytes == 102400
|
|
assert len(data.checksum_sha256) == 64
|
|
|
|
def test_invalid_checksum(self) -> None:
|
|
"""Test invalid SHA-256 checksum."""
|
|
with pytest.raises(ValidationError) as exc_info:
|
|
DocumentIngestedEventData(
|
|
doc_id="01H8Y9Z5M3K7N2P4Q6R8T0V1W3",
|
|
filename="test.pdf",
|
|
mime_type="application/pdf",
|
|
size_bytes=1024,
|
|
checksum_sha256="invalid", # Too short
|
|
kind="invoice",
|
|
source="manual_upload",
|
|
storage_path="path/to/file",
|
|
)
|
|
assert "Invalid SHA-256 checksum format" in str(exc_info.value)
|
|
|
|
def test_negative_size(self) -> None:
|
|
"""Test negative file size validation."""
|
|
with pytest.raises(ValidationError):
|
|
DocumentIngestedEventData(
|
|
doc_id="01H8Y9Z5M3K7N2P4Q6R8T0V1W3",
|
|
filename="test.pdf",
|
|
mime_type="application/pdf",
|
|
size_bytes=-1, # Negative size
|
|
checksum_sha256="a" * 64,
|
|
kind="invoice",
|
|
source="manual_upload",
|
|
storage_path="path/to/file",
|
|
)
|
|
|
|
def test_immutable(self) -> None:
|
|
"""Test that event data is immutable."""
|
|
data = DocumentIngestedEventData(
|
|
doc_id="01H8Y9Z5M3K7N2P4Q6R8T0V1W3",
|
|
filename="test.pdf",
|
|
mime_type="application/pdf",
|
|
size_bytes=1024,
|
|
checksum_sha256="a" * 64,
|
|
kind="invoice",
|
|
source="manual_upload",
|
|
storage_path="path/to/file",
|
|
)
|
|
with pytest.raises(ValidationError):
|
|
data.filename = "changed.pdf" # Should raise because frozen=True
|
|
|
|
|
|
class TestDocumentOCRReadyEventData:
|
|
"""Test DocumentOCRReadyEventData schema."""
|
|
|
|
def test_valid_event(self) -> None:
|
|
"""Test creating a valid OCR ready event."""
|
|
data = DocumentOCRReadyEventData(
|
|
doc_id="01H8Y9Z5M3K7N2P4Q6R8T0V1W3",
|
|
ocr_engine="tesseract",
|
|
page_count=3,
|
|
confidence_avg=0.95,
|
|
text_length=5000,
|
|
layout_detected=True,
|
|
languages_detected=["en"],
|
|
processing_time_ms=1500,
|
|
storage_path="ocr-results/doc_123.json",
|
|
)
|
|
assert data.ocr_engine == "tesseract"
|
|
assert data.confidence_avg == 0.95
|
|
assert 0.0 <= data.confidence_avg <= 1.0
|
|
|
|
def test_invalid_confidence(self) -> None:
|
|
"""Test invalid confidence score."""
|
|
with pytest.raises(ValidationError):
|
|
DocumentOCRReadyEventData(
|
|
doc_id="123",
|
|
ocr_engine="tesseract",
|
|
page_count=1,
|
|
confidence_avg=1.5, # > 1.0
|
|
text_length=100,
|
|
layout_detected=True,
|
|
processing_time_ms=1000,
|
|
storage_path="path",
|
|
)
|
|
|
|
def test_invalid_ocr_engine(self) -> None:
|
|
"""Test invalid OCR engine value."""
|
|
with pytest.raises(ValidationError):
|
|
DocumentOCRReadyEventData(
|
|
doc_id="123",
|
|
ocr_engine="invalid_engine", # Not in allowed values
|
|
page_count=1,
|
|
confidence_avg=0.9,
|
|
text_length=100,
|
|
layout_detected=True,
|
|
processing_time_ms=1000,
|
|
storage_path="path",
|
|
)
|
|
|
|
|
|
class TestDocumentExtractedEventData:
|
|
"""Test DocumentExtractedEventData schema."""
|
|
|
|
def test_valid_event(self) -> None:
|
|
"""Test creating a valid extraction event."""
|
|
data = DocumentExtractedEventData(
|
|
doc_id="01H8Y9Z5M3K7N2P4Q6R8T0V1W3",
|
|
extraction_id="extr_123",
|
|
strategy="hybrid",
|
|
fields_extracted=15,
|
|
confidence_avg=0.88,
|
|
calibrated_confidence=0.91,
|
|
model_name="gpt-4",
|
|
processing_time_ms=3000,
|
|
storage_path="extractions/extr_123.json",
|
|
)
|
|
assert data.strategy == "hybrid"
|
|
assert data.model_name == "gpt-4"
|
|
|
|
def test_valid_without_model(self) -> None:
|
|
"""Test extraction event without model (rules-based)."""
|
|
data = DocumentExtractedEventData(
|
|
doc_id="123",
|
|
extraction_id="extr_456",
|
|
strategy="rules",
|
|
fields_extracted=10,
|
|
confidence_avg=0.95,
|
|
calibrated_confidence=0.93,
|
|
model_name=None, # No model for rules-based
|
|
processing_time_ms=500,
|
|
storage_path="path",
|
|
)
|
|
assert data.model_name is None
|
|
assert data.strategy == "rules"
|
|
|
|
|
|
class TestKGEvents:
|
|
"""Test Knowledge Graph event schemas."""
|
|
|
|
def test_kg_upsert_ready(self) -> None:
|
|
"""Test KG upsert ready event."""
|
|
data = KGUpsertReadyEventData(
|
|
doc_id="01H8Y9Z5M3K7N2P4Q6R8T0V1W3",
|
|
entity_count=25,
|
|
relationship_count=40,
|
|
tax_year="2024-25",
|
|
taxpayer_id="TP-001",
|
|
normalization_id="norm_123",
|
|
storage_path="normalized/norm_123.json",
|
|
)
|
|
assert data.entity_count == 25
|
|
assert data.tax_year == "2024-25"
|
|
|
|
def test_kg_upserted(self) -> None:
|
|
"""Test KG upserted event."""
|
|
data = KGUpsertedEventData(
|
|
doc_id="01H8Y9Z5M3K7N2P4Q6R8T0V1W3",
|
|
entities_created=10,
|
|
entities_updated=5,
|
|
relationships_created=20,
|
|
relationships_updated=10,
|
|
shacl_violations=0,
|
|
processing_time_ms=2000,
|
|
success=True,
|
|
error_message=None,
|
|
)
|
|
assert data.success is True
|
|
assert data.shacl_violations == 0
|
|
|
|
def test_kg_upserted_with_violations(self) -> None:
|
|
"""Test KG upserted event with SHACL violations."""
|
|
data = KGUpsertedEventData(
|
|
doc_id="123",
|
|
entities_created=5,
|
|
entities_updated=0,
|
|
relationships_created=8,
|
|
relationships_updated=0,
|
|
shacl_violations=3,
|
|
processing_time_ms=1500,
|
|
success=False,
|
|
error_message="SHACL validation failed: Missing required property",
|
|
)
|
|
assert data.success is False
|
|
assert data.shacl_violations == 3
|
|
assert data.error_message is not None
|
|
|
|
|
|
class TestRAGIndexedEventData:
|
|
"""Test RAG indexed event schema."""
|
|
|
|
def test_valid_event(self) -> None:
|
|
"""Test creating a valid RAG indexed event."""
|
|
data = RAGIndexedEventData(
|
|
doc_id="01H8Y9Z5M3K7N2P4Q6R8T0V1W3",
|
|
collection_name="firm_knowledge",
|
|
chunks_indexed=45,
|
|
embedding_model="bge-small-en-v1.5",
|
|
pii_detected=True,
|
|
pii_redacted=True,
|
|
processing_time_ms=5000,
|
|
storage_path="chunks/doc_123.json",
|
|
)
|
|
assert data.pii_detected is True
|
|
assert data.pii_redacted is True
|
|
assert data.chunks_indexed == 45
|
|
|
|
|
|
class TestCalculationReadyEventData:
|
|
"""Test calculation ready event schema."""
|
|
|
|
def test_valid_event(self) -> None:
|
|
"""Test creating a valid calculation event."""
|
|
data = CalculationReadyEventData(
|
|
taxpayer_id="TP-001",
|
|
tax_year="2024-25",
|
|
schedule_id="SA103",
|
|
calculation_id="calc_789",
|
|
boxes_computed=50,
|
|
total_income=85000.50,
|
|
total_tax=18500.25,
|
|
confidence=0.92,
|
|
evidence_count=15,
|
|
processing_time_ms=2500,
|
|
storage_path="calculations/calc_789.json",
|
|
)
|
|
assert data.schedule_id == "SA103"
|
|
assert data.total_income == 85000.50
|
|
assert data.total_tax == 18500.25
|
|
|
|
def test_valid_without_totals(self) -> None:
|
|
"""Test calculation event without totals (partial calculation)."""
|
|
data = CalculationReadyEventData(
|
|
taxpayer_id="TP-001",
|
|
tax_year="2024-25",
|
|
schedule_id="SA102",
|
|
calculation_id="calc_456",
|
|
boxes_computed=20,
|
|
total_income=None,
|
|
total_tax=None,
|
|
confidence=0.85,
|
|
evidence_count=10,
|
|
processing_time_ms=1000,
|
|
storage_path="calculations/calc_456.json",
|
|
)
|
|
assert data.total_income is None
|
|
assert data.total_tax is None
|
|
|
|
|
|
class TestFormFilledEventData:
|
|
"""Test form filled event schema."""
|
|
|
|
def test_valid_event(self) -> None:
|
|
"""Test creating a valid form filled event."""
|
|
data = FormFilledEventData(
|
|
taxpayer_id="TP-001",
|
|
tax_year="2024-25",
|
|
form_id="SA100",
|
|
fields_filled=75,
|
|
pdf_size_bytes=524288,
|
|
storage_path="forms/SA100_filled.pdf",
|
|
evidence_bundle_path="evidence/bundle_123.zip",
|
|
checksum_sha256="b" * 64,
|
|
)
|
|
assert data.form_id == "SA100"
|
|
assert data.evidence_bundle_path is not None
|
|
|
|
|
|
class TestHMRCSubmittedEventData:
|
|
"""Test HMRC submitted event schema."""
|
|
|
|
def test_successful_submission(self) -> None:
|
|
"""Test successful HMRC submission."""
|
|
data = HMRCSubmittedEventData(
|
|
taxpayer_id="TP-001",
|
|
tax_year="2024-25",
|
|
submission_id="sub_999",
|
|
hmrc_reference="HMRC-REF-12345",
|
|
submission_type="sandbox",
|
|
success=True,
|
|
status_code=200,
|
|
error_message=None,
|
|
processing_time_ms=3000,
|
|
)
|
|
assert data.success is True
|
|
assert data.hmrc_reference is not None
|
|
|
|
def test_failed_submission(self) -> None:
|
|
"""Test failed HMRC submission."""
|
|
data = HMRCSubmittedEventData(
|
|
taxpayer_id="TP-001",
|
|
tax_year="2024-25",
|
|
submission_id="sub_888",
|
|
hmrc_reference=None,
|
|
submission_type="live",
|
|
success=False,
|
|
status_code=400,
|
|
error_message="Invalid UTR number",
|
|
processing_time_ms=1500,
|
|
)
|
|
assert data.success is False
|
|
assert data.error_message is not None
|
|
|
|
def test_invalid_submission_type(self) -> None:
|
|
"""Test invalid submission type."""
|
|
with pytest.raises(ValidationError):
|
|
HMRCSubmittedEventData(
|
|
taxpayer_id="TP-001",
|
|
tax_year="2024-25",
|
|
submission_id="sub_777",
|
|
hmrc_reference=None,
|
|
submission_type="invalid", # Not in allowed values
|
|
success=False,
|
|
status_code=None,
|
|
error_message=None,
|
|
processing_time_ms=1000,
|
|
)
|
|
|
|
|
|
class TestReviewEvents:
|
|
"""Test review event schemas."""
|
|
|
|
def test_review_requested(self) -> None:
|
|
"""Test review requested event."""
|
|
data = ReviewRequestedEventData(
|
|
doc_id="01H8Y9Z5M3K7N2P4Q6R8T0V1W3",
|
|
review_type="extraction",
|
|
priority="high",
|
|
reason="Low confidence extraction (0.65)",
|
|
assigned_to="reviewer@example.com",
|
|
due_date="2024-12-01T10:00:00Z",
|
|
metadata={"extraction_id": "extr_123"},
|
|
)
|
|
assert data.priority == "high"
|
|
assert data.review_type == "extraction"
|
|
|
|
def test_review_completed(self) -> None:
|
|
"""Test review completed event."""
|
|
data = ReviewCompletedEventData(
|
|
doc_id="01H8Y9Z5M3K7N2P4Q6R8T0V1W3",
|
|
review_id="rev_456",
|
|
reviewer="reviewer@example.com",
|
|
decision="approved",
|
|
changes_made=3,
|
|
comments="Fixed vendor name and amount",
|
|
review_duration_seconds=180,
|
|
)
|
|
assert data.decision == "approved"
|
|
assert data.changes_made == 3
|
|
|
|
|
|
class TestFirmSyncCompletedEventData:
|
|
"""Test firm sync completed event schema."""
|
|
|
|
def test_successful_sync(self) -> None:
|
|
"""Test successful firm sync."""
|
|
data = FirmSyncCompletedEventData(
|
|
firm_id="FIRM-001",
|
|
connector_type="xero",
|
|
sync_id="sync_123",
|
|
records_synced=150,
|
|
records_created=50,
|
|
records_updated=100,
|
|
records_failed=0,
|
|
success=True,
|
|
error_message=None,
|
|
processing_time_ms=10000,
|
|
)
|
|
assert data.success is True
|
|
assert data.records_failed == 0
|
|
|
|
def test_partial_sync_failure(self) -> None:
|
|
"""Test sync with some failures."""
|
|
data = FirmSyncCompletedEventData(
|
|
firm_id="FIRM-002",
|
|
connector_type="sage",
|
|
sync_id="sync_456",
|
|
records_synced=90,
|
|
records_created=30,
|
|
records_updated=60,
|
|
records_failed=10,
|
|
success=True, # Overall success despite some failures
|
|
error_message="10 records failed validation",
|
|
processing_time_ms=15000,
|
|
)
|
|
assert data.records_failed == 10
|
|
assert data.error_message is not None
|
|
|
|
|
|
class TestSchemaMapping:
|
|
"""Test schema mapping and validation utilities."""
|
|
|
|
def test_all_topics_have_schemas(self) -> None:
|
|
"""Test that all topics in EventTopics have corresponding schemas."""
|
|
topic_values = {
|
|
getattr(EventTopics, attr)
|
|
for attr in dir(EventTopics)
|
|
if not attr.startswith("_")
|
|
}
|
|
schema_topics = set(EVENT_SCHEMA_MAP.keys())
|
|
|
|
# All event topics should have schemas
|
|
missing_schemas = topic_values - schema_topics
|
|
assert not missing_schemas, f"Missing schemas for topics: {missing_schemas}"
|
|
|
|
def test_validate_event_data(self) -> None:
|
|
"""Test validate_event_data function."""
|
|
valid_data = {
|
|
"doc_id": "01H8Y9Z5M3K7N2P4Q6R8T0V1W3",
|
|
"filename": "test.pdf",
|
|
"mime_type": "application/pdf",
|
|
"size_bytes": 1024,
|
|
"checksum_sha256": "a" * 64,
|
|
"kind": "invoice",
|
|
"source": "manual_upload",
|
|
"storage_path": "path/to/file",
|
|
}
|
|
|
|
result = validate_event_data("doc.ingested", valid_data)
|
|
assert isinstance(result, DocumentIngestedEventData)
|
|
assert result.doc_id == "01H8Y9Z5M3K7N2P4Q6R8T0V1W3"
|
|
|
|
def test_validate_unknown_topic(self) -> None:
|
|
"""Test validation with unknown topic."""
|
|
with pytest.raises(ValueError, match="Unknown event topic"):
|
|
validate_event_data("unknown.topic", {})
|
|
|
|
def test_validate_invalid_data(self) -> None:
|
|
"""Test validation with invalid data."""
|
|
invalid_data = {
|
|
"doc_id": "123",
|
|
"filename": "test.pdf",
|
|
# Missing required fields
|
|
}
|
|
|
|
with pytest.raises(ValidationError):
|
|
validate_event_data("doc.ingested", invalid_data)
|
|
|
|
def test_get_schema_for_topic(self) -> None:
|
|
"""Test get_schema_for_topic function."""
|
|
schema = get_schema_for_topic("doc.ingested")
|
|
assert schema == DocumentIngestedEventData
|
|
|
|
def test_get_schema_unknown_topic(self) -> None:
|
|
"""Test get_schema_for_topic with unknown topic."""
|
|
with pytest.raises(ValueError, match="Unknown event topic"):
|
|
get_schema_for_topic("unknown.topic")
|
|
|
|
def test_schema_prevents_extra_fields(self) -> None:
|
|
"""Test that schemas prevent extra fields (extra='forbid')."""
|
|
with pytest.raises(ValidationError) as exc_info:
|
|
DocumentIngestedEventData(
|
|
doc_id="123",
|
|
filename="test.pdf",
|
|
mime_type="application/pdf",
|
|
size_bytes=1024,
|
|
checksum_sha256="a" * 64,
|
|
kind="invoice",
|
|
source="manual_upload",
|
|
storage_path="path",
|
|
unexpected_field="should_fail", # Extra field
|
|
)
|
|
assert "Extra inputs are not permitted" in str(exc_info.value)
|