import asyncio import httpx import pytest from libs.events import EventTopics, NATSEventBus from libs.schemas.events import DocumentExtractedEventData # Configuration INGESTION_URL = "http://localhost:8000" NATS_URL = "nats://localhost:4222" TENANT_ID = "tenant_e2e_test" @pytest.mark.e2e @pytest.mark.asyncio async def test_backend_journey(): """ E2E test for the full backend journey: Ingest -> OCR -> Extract. """ # 1. Initialize NATS bus bus = NATSEventBus( servers=[NATS_URL], stream_name="TAX_AGENT_EVENTS", consumer_group="e2e-test-consumer", ) await bus.start() # Future to capture the final event extraction_future = asyncio.Future() async def extraction_handler(topic, payload): if payload.tenant_id == TENANT_ID: extraction_future.set_result(payload) # Subscribe to the final event in the chain await bus.subscribe(EventTopics.DOC_EXTRACTED, extraction_handler) try: # 2. Upload a document async with httpx.AsyncClient( verify=False ) as client: # Disable SSL verification for local testing # Create a dummy PDF file # Create a valid minimal PDF file pdf_content = ( b"%PDF-1.0\n1 0 obj<>endobj 2 0 obj<>endobj " b"3 0 obj<>>>endobj\nxref\n0 4\n0000000000 65535 f\n" b"0000000010 00000 n\n0000000060 00000 n\n0000000111 00000 n\ntrailer<>\nstartxref\n190\n%%EOF" ) files = {"file": ("test.pdf", pdf_content, "application/pdf")} response = await client.post( f"{INGESTION_URL}/upload", files=files, data={"kind": "invoice", "source": "e2e_test"}, headers={ "X-Tenant-ID": TENANT_ID, "X-User-ID": "e2e_tester", # Required by TrustedProxyMiddleware "X-Authenticated-User": "e2e_tester", "X-Authenticated-Email": "e2e@example.com", "Authorization": "Bearer mock-token", }, ) assert response.status_code == 200, f"Upload failed: {response.text}" upload_data = response.json() doc_id = upload_data["doc_id"] print(f"Uploaded document: {doc_id}") # 3. Wait for extraction event (with timeout) try: # Give it enough time for the whole chain to process payload = await asyncio.wait_for(extraction_future, timeout=30.0) # 4. Verify payload data = payload.data assert data["doc_id"] == doc_id assert data["tenant_id"] == TENANT_ID assert "extraction_results" in data # Validate against schema event_data = DocumentExtractedEventData(**data) assert event_data.doc_id == doc_id print("E2E Journey completed successfully!") except TimeoutError: pytest.fail("Timed out waiting for extraction event") finally: await bus.stop()