"""Tests for event schema validation.""" import pytest from pydantic import ValidationError from libs.events.topics import EventTopics from libs.schemas.events import ( EVENT_SCHEMA_MAP, CalculationReadyEventData, DocumentExtractedEventData, DocumentIngestedEventData, DocumentOCRReadyEventData, FirmSyncCompletedEventData, FormFilledEventData, HMRCSubmittedEventData, KGUpsertedEventData, KGUpsertReadyEventData, RAGIndexedEventData, ReviewCompletedEventData, ReviewRequestedEventData, get_schema_for_topic, validate_event_data, ) class TestDocumentIngestedEventData: """Test DocumentIngestedEventData schema.""" def test_valid_event(self) -> None: """Test creating a valid document ingested event.""" data = DocumentIngestedEventData( doc_id="01H8Y9Z5M3K7N2P4Q6R8T0V1W3", filename="invoice_2024.pdf", mime_type="application/pdf", size_bytes=102400, checksum_sha256="a" * 64, kind="invoice", source="manual_upload", storage_path="raw-documents/2024/invoice_2024.pdf", ) assert data.doc_id == "01H8Y9Z5M3K7N2P4Q6R8T0V1W3" assert data.size_bytes == 102400 assert len(data.checksum_sha256) == 64 def test_invalid_checksum(self) -> None: """Test invalid SHA-256 checksum.""" with pytest.raises(ValidationError) as exc_info: DocumentIngestedEventData( doc_id="01H8Y9Z5M3K7N2P4Q6R8T0V1W3", filename="test.pdf", mime_type="application/pdf", size_bytes=1024, checksum_sha256="invalid", # Too short kind="invoice", source="manual_upload", storage_path="path/to/file", ) assert "Invalid SHA-256 checksum format" in str(exc_info.value) def test_negative_size(self) -> None: """Test negative file size validation.""" with pytest.raises(ValidationError): DocumentIngestedEventData( doc_id="01H8Y9Z5M3K7N2P4Q6R8T0V1W3", filename="test.pdf", mime_type="application/pdf", size_bytes=-1, # Negative size checksum_sha256="a" * 64, kind="invoice", source="manual_upload", storage_path="path/to/file", ) def test_immutable(self) -> None: """Test that event data is immutable.""" data = DocumentIngestedEventData( doc_id="01H8Y9Z5M3K7N2P4Q6R8T0V1W3", filename="test.pdf", mime_type="application/pdf", size_bytes=1024, checksum_sha256="a" * 64, kind="invoice", source="manual_upload", storage_path="path/to/file", ) with pytest.raises(ValidationError): data.filename = "changed.pdf" # Should raise because frozen=True class TestDocumentOCRReadyEventData: """Test DocumentOCRReadyEventData schema.""" def test_valid_event(self) -> None: """Test creating a valid OCR ready event.""" data = DocumentOCRReadyEventData( doc_id="01H8Y9Z5M3K7N2P4Q6R8T0V1W3", ocr_engine="tesseract", page_count=3, confidence_avg=0.95, text_length=5000, layout_detected=True, languages_detected=["en"], processing_time_ms=1500, storage_path="ocr-results/doc_123.json", ) assert data.ocr_engine == "tesseract" assert data.confidence_avg == 0.95 assert 0.0 <= data.confidence_avg <= 1.0 def test_invalid_confidence(self) -> None: """Test invalid confidence score.""" with pytest.raises(ValidationError): DocumentOCRReadyEventData( doc_id="123", ocr_engine="tesseract", page_count=1, confidence_avg=1.5, # > 1.0 text_length=100, layout_detected=True, processing_time_ms=1000, storage_path="path", ) def test_invalid_ocr_engine(self) -> None: """Test invalid OCR engine value.""" with pytest.raises(ValidationError): DocumentOCRReadyEventData( doc_id="123", ocr_engine="invalid_engine", # Not in allowed values page_count=1, confidence_avg=0.9, text_length=100, layout_detected=True, processing_time_ms=1000, storage_path="path", ) class TestDocumentExtractedEventData: """Test DocumentExtractedEventData schema.""" def test_valid_event(self) -> None: """Test creating a valid extraction event.""" data = DocumentExtractedEventData( doc_id="01H8Y9Z5M3K7N2P4Q6R8T0V1W3", extraction_id="extr_123", strategy="hybrid", fields_extracted=15, confidence_avg=0.88, calibrated_confidence=0.91, model_name="gpt-4", processing_time_ms=3000, storage_path="extractions/extr_123.json", ) assert data.strategy == "hybrid" assert data.model_name == "gpt-4" def test_valid_without_model(self) -> None: """Test extraction event without model (rules-based).""" data = DocumentExtractedEventData( doc_id="123", extraction_id="extr_456", strategy="rules", fields_extracted=10, confidence_avg=0.95, calibrated_confidence=0.93, model_name=None, # No model for rules-based processing_time_ms=500, storage_path="path", ) assert data.model_name is None assert data.strategy == "rules" class TestKGEvents: """Test Knowledge Graph event schemas.""" def test_kg_upsert_ready(self) -> None: """Test KG upsert ready event.""" data = KGUpsertReadyEventData( doc_id="01H8Y9Z5M3K7N2P4Q6R8T0V1W3", entity_count=25, relationship_count=40, tax_year="2024-25", taxpayer_id="TP-001", normalization_id="norm_123", storage_path="normalized/norm_123.json", ) assert data.entity_count == 25 assert data.tax_year == "2024-25" def test_kg_upserted(self) -> None: """Test KG upserted event.""" data = KGUpsertedEventData( doc_id="01H8Y9Z5M3K7N2P4Q6R8T0V1W3", entities_created=10, entities_updated=5, relationships_created=20, relationships_updated=10, shacl_violations=0, processing_time_ms=2000, success=True, error_message=None, ) assert data.success is True assert data.shacl_violations == 0 def test_kg_upserted_with_violations(self) -> None: """Test KG upserted event with SHACL violations.""" data = KGUpsertedEventData( doc_id="123", entities_created=5, entities_updated=0, relationships_created=8, relationships_updated=0, shacl_violations=3, processing_time_ms=1500, success=False, error_message="SHACL validation failed: Missing required property", ) assert data.success is False assert data.shacl_violations == 3 assert data.error_message is not None class TestRAGIndexedEventData: """Test RAG indexed event schema.""" def test_valid_event(self) -> None: """Test creating a valid RAG indexed event.""" data = RAGIndexedEventData( doc_id="01H8Y9Z5M3K7N2P4Q6R8T0V1W3", collection_name="firm_knowledge", chunks_indexed=45, embedding_model="bge-small-en-v1.5", pii_detected=True, pii_redacted=True, processing_time_ms=5000, storage_path="chunks/doc_123.json", ) assert data.pii_detected is True assert data.pii_redacted is True assert data.chunks_indexed == 45 class TestCalculationReadyEventData: """Test calculation ready event schema.""" def test_valid_event(self) -> None: """Test creating a valid calculation event.""" data = CalculationReadyEventData( taxpayer_id="TP-001", tax_year="2024-25", schedule_id="SA103", calculation_id="calc_789", boxes_computed=50, total_income=85000.50, total_tax=18500.25, confidence=0.92, evidence_count=15, processing_time_ms=2500, storage_path="calculations/calc_789.json", ) assert data.schedule_id == "SA103" assert data.total_income == 85000.50 assert data.total_tax == 18500.25 def test_valid_without_totals(self) -> None: """Test calculation event without totals (partial calculation).""" data = CalculationReadyEventData( taxpayer_id="TP-001", tax_year="2024-25", schedule_id="SA102", calculation_id="calc_456", boxes_computed=20, total_income=None, total_tax=None, confidence=0.85, evidence_count=10, processing_time_ms=1000, storage_path="calculations/calc_456.json", ) assert data.total_income is None assert data.total_tax is None class TestFormFilledEventData: """Test form filled event schema.""" def test_valid_event(self) -> None: """Test creating a valid form filled event.""" data = FormFilledEventData( taxpayer_id="TP-001", tax_year="2024-25", form_id="SA100", fields_filled=75, pdf_size_bytes=524288, storage_path="forms/SA100_filled.pdf", evidence_bundle_path="evidence/bundle_123.zip", checksum_sha256="b" * 64, ) assert data.form_id == "SA100" assert data.evidence_bundle_path is not None class TestHMRCSubmittedEventData: """Test HMRC submitted event schema.""" def test_successful_submission(self) -> None: """Test successful HMRC submission.""" data = HMRCSubmittedEventData( taxpayer_id="TP-001", tax_year="2024-25", submission_id="sub_999", hmrc_reference="HMRC-REF-12345", submission_type="sandbox", success=True, status_code=200, error_message=None, processing_time_ms=3000, ) assert data.success is True assert data.hmrc_reference is not None def test_failed_submission(self) -> None: """Test failed HMRC submission.""" data = HMRCSubmittedEventData( taxpayer_id="TP-001", tax_year="2024-25", submission_id="sub_888", hmrc_reference=None, submission_type="live", success=False, status_code=400, error_message="Invalid UTR number", processing_time_ms=1500, ) assert data.success is False assert data.error_message is not None def test_invalid_submission_type(self) -> None: """Test invalid submission type.""" with pytest.raises(ValidationError): HMRCSubmittedEventData( taxpayer_id="TP-001", tax_year="2024-25", submission_id="sub_777", hmrc_reference=None, submission_type="invalid", # Not in allowed values success=False, status_code=None, error_message=None, processing_time_ms=1000, ) class TestReviewEvents: """Test review event schemas.""" def test_review_requested(self) -> None: """Test review requested event.""" data = ReviewRequestedEventData( doc_id="01H8Y9Z5M3K7N2P4Q6R8T0V1W3", review_type="extraction", priority="high", reason="Low confidence extraction (0.65)", assigned_to="reviewer@example.com", due_date="2024-12-01T10:00:00Z", metadata={"extraction_id": "extr_123"}, ) assert data.priority == "high" assert data.review_type == "extraction" def test_review_completed(self) -> None: """Test review completed event.""" data = ReviewCompletedEventData( doc_id="01H8Y9Z5M3K7N2P4Q6R8T0V1W3", review_id="rev_456", reviewer="reviewer@example.com", decision="approved", changes_made=3, comments="Fixed vendor name and amount", review_duration_seconds=180, ) assert data.decision == "approved" assert data.changes_made == 3 class TestFirmSyncCompletedEventData: """Test firm sync completed event schema.""" def test_successful_sync(self) -> None: """Test successful firm sync.""" data = FirmSyncCompletedEventData( firm_id="FIRM-001", connector_type="xero", sync_id="sync_123", records_synced=150, records_created=50, records_updated=100, records_failed=0, success=True, error_message=None, processing_time_ms=10000, ) assert data.success is True assert data.records_failed == 0 def test_partial_sync_failure(self) -> None: """Test sync with some failures.""" data = FirmSyncCompletedEventData( firm_id="FIRM-002", connector_type="sage", sync_id="sync_456", records_synced=90, records_created=30, records_updated=60, records_failed=10, success=True, # Overall success despite some failures error_message="10 records failed validation", processing_time_ms=15000, ) assert data.records_failed == 10 assert data.error_message is not None class TestSchemaMapping: """Test schema mapping and validation utilities.""" def test_all_topics_have_schemas(self) -> None: """Test that all topics in EventTopics have corresponding schemas.""" topic_values = { getattr(EventTopics, attr) for attr in dir(EventTopics) if not attr.startswith("_") } schema_topics = set(EVENT_SCHEMA_MAP.keys()) # All event topics should have schemas missing_schemas = topic_values - schema_topics assert not missing_schemas, f"Missing schemas for topics: {missing_schemas}" def test_validate_event_data(self) -> None: """Test validate_event_data function.""" valid_data = { "doc_id": "01H8Y9Z5M3K7N2P4Q6R8T0V1W3", "filename": "test.pdf", "mime_type": "application/pdf", "size_bytes": 1024, "checksum_sha256": "a" * 64, "kind": "invoice", "source": "manual_upload", "storage_path": "path/to/file", } result = validate_event_data("doc.ingested", valid_data) assert isinstance(result, DocumentIngestedEventData) assert result.doc_id == "01H8Y9Z5M3K7N2P4Q6R8T0V1W3" def test_validate_unknown_topic(self) -> None: """Test validation with unknown topic.""" with pytest.raises(ValueError, match="Unknown event topic"): validate_event_data("unknown.topic", {}) def test_validate_invalid_data(self) -> None: """Test validation with invalid data.""" invalid_data = { "doc_id": "123", "filename": "test.pdf", # Missing required fields } with pytest.raises(ValidationError): validate_event_data("doc.ingested", invalid_data) def test_get_schema_for_topic(self) -> None: """Test get_schema_for_topic function.""" schema = get_schema_for_topic("doc.ingested") assert schema == DocumentIngestedEventData def test_get_schema_unknown_topic(self) -> None: """Test get_schema_for_topic with unknown topic.""" with pytest.raises(ValueError, match="Unknown event topic"): get_schema_for_topic("unknown.topic") def test_schema_prevents_extra_fields(self) -> None: """Test that schemas prevent extra fields (extra='forbid').""" with pytest.raises(ValidationError) as exc_info: DocumentIngestedEventData( doc_id="123", filename="test.pdf", mime_type="application/pdf", size_bytes=1024, checksum_sha256="a" * 64, kind="invoice", source="manual_upload", storage_path="path", unexpected_field="should_fail", # Extra field ) assert "Extra inputs are not permitted" in str(exc_info.value)