Some checks failed
CI/CD Pipeline / Code Quality & Linting (push) Has been cancelled
CI/CD Pipeline / Policy Validation (push) Has been cancelled
CI/CD Pipeline / Test Suite (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-firm-connectors) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-forms) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-hmrc) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ingestion) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-normalize-map) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ocr) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-indexer) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-reason) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rpa) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (ui-review) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (ui-review) (push) Has been cancelled
CI/CD Pipeline / Generate SBOM (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Notifications (push) Has been cancelled
125 lines
4.6 KiB
Python
125 lines
4.6 KiB
Python
import asyncio
|
|
|
|
import httpx
|
|
import pytest
|
|
|
|
from libs.events import EventTopics, NATSEventBus
|
|
|
|
# Configuration
|
|
INGESTION_URL = "http://localhost:8000"
|
|
NATS_URL = "nats://localhost:4222"
|
|
TENANT_ID = "tenant_e2e_test"
|
|
|
|
|
|
@pytest.mark.e2e
|
|
@pytest.mark.asyncio
|
|
async def test_backend_journey():
|
|
"""
|
|
E2E test for the full backend journey: Ingest -> OCR -> Extract.
|
|
"""
|
|
# 1. Initialize NATS bus
|
|
bus = NATSEventBus(
|
|
servers=[NATS_URL],
|
|
stream_name="TAX_AGENT_EVENTS",
|
|
consumer_group="e2e-test-consumer",
|
|
)
|
|
await bus.start()
|
|
|
|
# Queues to capture events
|
|
extraction_queue = asyncio.Queue()
|
|
kg_ready_queue = asyncio.Queue()
|
|
kg_upserted_queue = asyncio.Queue()
|
|
|
|
async def extraction_handler(topic, payload):
|
|
if payload.tenant_id == TENANT_ID:
|
|
await extraction_queue.put(payload)
|
|
|
|
async def kg_ready_handler(topic, payload):
|
|
await kg_ready_queue.put(payload)
|
|
|
|
async def kg_upserted_handler(topic, payload):
|
|
await kg_upserted_queue.put(payload)
|
|
|
|
# Subscribe to events
|
|
await bus.subscribe(EventTopics.DOC_EXTRACTED, extraction_handler)
|
|
await bus.subscribe(EventTopics.KG_UPSERT_READY, kg_ready_handler)
|
|
await bus.subscribe(EventTopics.KG_UPSERTED, kg_upserted_handler)
|
|
|
|
try:
|
|
# 2. Upload a document
|
|
async with httpx.AsyncClient(
|
|
verify=False
|
|
) as client: # Disable SSL verification for local testing
|
|
# Create a valid minimal PDF file
|
|
pdf_content = (
|
|
b"%PDF-1.0\n1 0 obj<</Type/Catalog/Pages 2 0 R>>endobj 2 0 obj<</Type/Pages/Kids[3 0 R]/Count 1>>endobj "
|
|
b"3 0 obj<</Type/Page/MediaBox[0 0 3 3]/Parent 2 0 R/Resources<<>>>>endobj\nxref\n0 4\n0000000000 65535 f\n"
|
|
b"0000000010 00000 n\n0000000060 00000 n\n0000000111 00000 n\ntrailer<</Size 4/Root 1 0 R>>\nstartxref\n190\n%%EOF"
|
|
)
|
|
files = {"file": ("test.pdf", pdf_content, "application/pdf")}
|
|
response = await client.post(
|
|
f"{INGESTION_URL}/upload",
|
|
files=files,
|
|
data={"kind": "invoice", "source": "e2e_test"},
|
|
headers={
|
|
"X-Tenant-ID": TENANT_ID,
|
|
"X-User-ID": "e2e_tester",
|
|
# Required by TrustedProxyMiddleware
|
|
"X-Authenticated-User": "e2e_tester",
|
|
"X-Authenticated-Email": "e2e@example.com",
|
|
"Authorization": "Bearer mock-token",
|
|
},
|
|
)
|
|
assert response.status_code == 200, f"Upload failed: {response.text}"
|
|
upload_data = response.json()
|
|
doc_id = upload_data["doc_id"]
|
|
print(f"Uploaded document: {doc_id}")
|
|
|
|
# Helper to wait for matching event
|
|
async def wait_for_event(queue, event_name):
|
|
start_time = asyncio.get_event_loop().time()
|
|
timeout = 30.0
|
|
while True:
|
|
remaining = timeout - (asyncio.get_event_loop().time() - start_time)
|
|
if remaining <= 0:
|
|
raise TimeoutError(f"Timed out waiting for {event_name}")
|
|
|
|
try:
|
|
payload = await asyncio.wait_for(queue.get(), timeout=remaining)
|
|
data = payload.data
|
|
if data.get("doc_id") == doc_id:
|
|
return payload
|
|
print(
|
|
f"Ignoring {event_name} for different doc_id: {data.get('doc_id')}"
|
|
)
|
|
except TimeoutError:
|
|
raise TimeoutError(f"Timed out waiting for {event_name}")
|
|
|
|
# 3. Wait for extraction event
|
|
try:
|
|
payload = await wait_for_event(extraction_queue, "extraction event")
|
|
print("Extraction completed successfully!")
|
|
except TimeoutError:
|
|
pytest.fail("Timed out waiting for extraction event")
|
|
|
|
# 4. Wait for KG Ready event
|
|
try:
|
|
payload = await wait_for_event(kg_ready_queue, "KG Ready event")
|
|
print("Normalization completed successfully!")
|
|
except TimeoutError:
|
|
pytest.fail("Timed out waiting for KG Ready event")
|
|
|
|
# 5. Wait for KG Upserted event
|
|
try:
|
|
payload = await wait_for_event(kg_upserted_queue, "KG Upserted event")
|
|
data = payload.data
|
|
assert data["success"] is True
|
|
print("KG Upsert completed successfully!")
|
|
print("E2E Journey completed successfully!")
|
|
|
|
except TimeoutError:
|
|
pytest.fail("Timed out waiting for KG Upserted event")
|
|
|
|
finally:
|
|
await bus.stop()
|