import asyncio import httpx import pytest from libs.events import EventTopics, NATSEventBus # Configuration INGESTION_URL = "http://localhost:8000" NATS_URL = "nats://localhost:4222" TENANT_ID = "tenant_e2e_test" @pytest.mark.e2e @pytest.mark.asyncio async def test_backend_journey(): """ E2E test for the full backend journey: Ingest -> OCR -> Extract. """ # 1. Initialize NATS bus bus = NATSEventBus( servers=[NATS_URL], stream_name="TAX_AGENT_EVENTS", consumer_group="e2e-test-consumer", ) await bus.start() # Queues to capture events extraction_queue = asyncio.Queue() kg_ready_queue = asyncio.Queue() kg_upserted_queue = asyncio.Queue() async def extraction_handler(topic, payload): if payload.tenant_id == TENANT_ID: await extraction_queue.put(payload) async def kg_ready_handler(topic, payload): await kg_ready_queue.put(payload) async def kg_upserted_handler(topic, payload): await kg_upserted_queue.put(payload) # Subscribe to events await bus.subscribe(EventTopics.DOC_EXTRACTED, extraction_handler) await bus.subscribe(EventTopics.KG_UPSERT_READY, kg_ready_handler) await bus.subscribe(EventTopics.KG_UPSERTED, kg_upserted_handler) try: # 2. Upload a document async with httpx.AsyncClient( verify=False ) as client: # Disable SSL verification for local testing # Create a valid minimal PDF file pdf_content = ( b"%PDF-1.0\n1 0 obj<>endobj 2 0 obj<>endobj " b"3 0 obj<>>>endobj\nxref\n0 4\n0000000000 65535 f\n" b"0000000010 00000 n\n0000000060 00000 n\n0000000111 00000 n\ntrailer<>\nstartxref\n190\n%%EOF" ) files = {"file": ("test.pdf", pdf_content, "application/pdf")} response = await client.post( f"{INGESTION_URL}/upload", files=files, data={"kind": "invoice", "source": "e2e_test"}, headers={ "X-Tenant-ID": TENANT_ID, "X-User-ID": "e2e_tester", # Required by TrustedProxyMiddleware "X-Authenticated-User": "e2e_tester", "X-Authenticated-Email": "e2e@example.com", "Authorization": "Bearer mock-token", }, ) assert response.status_code == 200, f"Upload failed: {response.text}" upload_data = response.json() doc_id = upload_data["doc_id"] print(f"Uploaded document: {doc_id}") # Helper to wait for matching event async def wait_for_event(queue, event_name): start_time = asyncio.get_event_loop().time() timeout = 30.0 while True: remaining = timeout - (asyncio.get_event_loop().time() - start_time) if remaining <= 0: raise TimeoutError(f"Timed out waiting for {event_name}") try: payload = await asyncio.wait_for(queue.get(), timeout=remaining) data = payload.data if data.get("doc_id") == doc_id: return payload print( f"Ignoring {event_name} for different doc_id: {data.get('doc_id')}" ) except TimeoutError: raise TimeoutError(f"Timed out waiting for {event_name}") # 3. Wait for extraction event try: payload = await wait_for_event(extraction_queue, "extraction event") print("Extraction completed successfully!") except TimeoutError: pytest.fail("Timed out waiting for extraction event") # 4. Wait for KG Ready event try: payload = await wait_for_event(kg_ready_queue, "KG Ready event") print("Normalization completed successfully!") except TimeoutError: pytest.fail("Timed out waiting for KG Ready event") # 5. Wait for KG Upserted event try: payload = await wait_for_event(kg_upserted_queue, "KG Upserted event") data = payload.data assert data["success"] is True print("KG Upsert completed successfully!") print("E2E Journey completed successfully!") except TimeoutError: pytest.fail("Timed out waiting for KG Upserted event") finally: await bus.stop()