import asyncio import httpx import pytest from libs.events import EventTopics, NATSEventBus from libs.schemas.events import DocumentExtractedEventData # Configuration INGESTION_URL = "http://localhost:8000" NATS_URL = "nats://localhost:4222" TENANT_ID = "tenant_e2e_test" @pytest.mark.e2e @pytest.mark.asyncio async def test_backend_journey(): """ E2E test for the full backend journey: Ingest -> OCR -> Extract. """ # 1. Initialize NATS bus bus = NATSEventBus( servers=[NATS_URL], stream_name="TAX_AGENT_EVENTS", consumer_group="e2e-test-consumer", ) await bus.start() # Future to capture the final event extraction_future = asyncio.Future() async def extraction_handler(topic, payload): if payload.tenant_id == TENANT_ID: extraction_future.set_result(payload) # Subscribe to the final event in the chain await bus.subscribe(EventTopics.DOC_EXTRACTED, extraction_handler) try: # 2. Upload a document async with httpx.AsyncClient() as client: # Create a dummy PDF file files = {"file": ("test.pdf", b"%PDF-1.4 mock content", "application/pdf")} response = await client.post( f"{INGESTION_URL}/upload", files=files, data={"kind": "invoice", "source": "e2e_test"}, headers={"X-Tenant-ID": TENANT_ID, "X-User-ID": "e2e_tester"}, ) assert response.status_code == 200, f"Upload failed: {response.text}" upload_data = response.json() doc_id = upload_data["doc_id"] print(f"Uploaded document: {doc_id}") # 3. Wait for extraction event (with timeout) try: # Give it enough time for the whole chain to process payload = await asyncio.wait_for(extraction_future, timeout=30.0) # 4. Verify payload data = payload.data assert data["doc_id"] == doc_id assert data["tenant_id"] == TENANT_ID assert "extraction_results" in data # Validate against schema event_data = DocumentExtractedEventData(**data) assert event_data.doc_id == doc_id print("E2E Journey completed successfully!") except TimeoutError: pytest.fail("Timed out waiting for extraction event") finally: await bus.stop()