e2e backend test
Some checks failed
CI/CD Pipeline / Code Quality & Linting (push) Has been cancelled
CI/CD Pipeline / Policy Validation (push) Has been cancelled
CI/CD Pipeline / Test Suite (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-firm-connectors) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-forms) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-hmrc) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ingestion) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-normalize-map) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ocr) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-indexer) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-reason) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rpa) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (ui-review) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (ui-review) (push) Has been cancelled
CI/CD Pipeline / Generate SBOM (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Notifications (push) Has been cancelled

This commit is contained in:
harkon
2025-12-01 13:58:38 +02:00
parent db61b05c80
commit a99754b86c
4 changed files with 85 additions and 54 deletions

View File

@@ -4,7 +4,6 @@ import httpx
import pytest
from libs.events import EventTopics, NATSEventBus
from libs.schemas.events import DocumentExtractedEventData
# Configuration
INGESTION_URL = "http://localhost:8000"
@@ -26,22 +25,31 @@ async def test_backend_journey():
)
await bus.start()
# Future to capture the final event
extraction_future = asyncio.Future()
# Queues to capture events
extraction_queue = asyncio.Queue()
kg_ready_queue = asyncio.Queue()
kg_upserted_queue = asyncio.Queue()
async def extraction_handler(topic, payload):
if payload.tenant_id == TENANT_ID:
extraction_future.set_result(payload)
await extraction_queue.put(payload)
# Subscribe to the final event in the chain
async def kg_ready_handler(topic, payload):
await kg_ready_queue.put(payload)
async def kg_upserted_handler(topic, payload):
await kg_upserted_queue.put(payload)
# Subscribe to events
await bus.subscribe(EventTopics.DOC_EXTRACTED, extraction_handler)
await bus.subscribe(EventTopics.KG_UPSERT_READY, kg_ready_handler)
await bus.subscribe(EventTopics.KG_UPSERTED, kg_upserted_handler)
try:
# 2. Upload a document
async with httpx.AsyncClient(
verify=False
) as client: # Disable SSL verification for local testing
# Create a dummy PDF file
# Create a valid minimal PDF file
pdf_content = (
b"%PDF-1.0\n1 0 obj<</Type/Catalog/Pages 2 0 R>>endobj 2 0 obj<</Type/Pages/Kids[3 0 R]/Count 1>>endobj "
@@ -67,25 +75,50 @@ async def test_backend_journey():
doc_id = upload_data["doc_id"]
print(f"Uploaded document: {doc_id}")
# 3. Wait for extraction event (with timeout)
# Helper to wait for matching event
async def wait_for_event(queue, event_name):
start_time = asyncio.get_event_loop().time()
timeout = 30.0
while True:
remaining = timeout - (asyncio.get_event_loop().time() - start_time)
if remaining <= 0:
raise TimeoutError(f"Timed out waiting for {event_name}")
try:
payload = await asyncio.wait_for(queue.get(), timeout=remaining)
data = payload.data
if data.get("doc_id") == doc_id:
return payload
print(
f"Ignoring {event_name} for different doc_id: {data.get('doc_id')}"
)
except TimeoutError:
raise TimeoutError(f"Timed out waiting for {event_name}")
# 3. Wait for extraction event
try:
# Give it enough time for the whole chain to process
payload = await asyncio.wait_for(extraction_future, timeout=30.0)
# 4. Verify payload
data = payload.data
assert data["doc_id"] == doc_id
assert data["tenant_id"] == TENANT_ID
assert "extraction_results" in data
# Validate against schema
event_data = DocumentExtractedEventData(**data)
assert event_data.doc_id == doc_id
print("E2E Journey completed successfully!")
payload = await wait_for_event(extraction_queue, "extraction event")
print("Extraction completed successfully!")
except TimeoutError:
pytest.fail("Timed out waiting for extraction event")
# 4. Wait for KG Ready event
try:
payload = await wait_for_event(kg_ready_queue, "KG Ready event")
print("Normalization completed successfully!")
except TimeoutError:
pytest.fail("Timed out waiting for KG Ready event")
# 5. Wait for KG Upserted event
try:
payload = await wait_for_event(kg_upserted_queue, "KG Upserted event")
data = payload.data
assert data["success"] is True
print("KG Upsert completed successfully!")
print("E2E Journey completed successfully!")
except TimeoutError:
pytest.fail("Timed out waiting for KG Upserted event")
finally:
await bus.stop()