e2e backend test
Some checks failed
CI/CD Pipeline / Code Quality & Linting (push) Has been cancelled
CI/CD Pipeline / Policy Validation (push) Has been cancelled
CI/CD Pipeline / Test Suite (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-firm-connectors) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-forms) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-hmrc) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ingestion) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-normalize-map) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ocr) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-indexer) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-reason) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rpa) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (ui-review) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (ui-review) (push) Has been cancelled
CI/CD Pipeline / Generate SBOM (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Notifications (push) Has been cancelled
Some checks failed
CI/CD Pipeline / Code Quality & Linting (push) Has been cancelled
CI/CD Pipeline / Policy Validation (push) Has been cancelled
CI/CD Pipeline / Test Suite (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-firm-connectors) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-forms) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-hmrc) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ingestion) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-normalize-map) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ocr) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-indexer) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-reason) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rpa) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (ui-review) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (ui-review) (push) Has been cancelled
CI/CD Pipeline / Generate SBOM (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Notifications (push) Has been cancelled
This commit is contained in:
@@ -64,20 +64,20 @@ async def init_dependencies(app_settings: KGSettings) -> None:
|
|||||||
shapes_graph = None
|
shapes_graph = None
|
||||||
|
|
||||||
|
|
||||||
|
async def startup_event() -> None:
|
||||||
|
"""Initialize service dependencies"""
|
||||||
|
await init_dependencies(cast(KGSettings, _settings))
|
||||||
|
|
||||||
|
|
||||||
app, _settings = create_app(
|
app, _settings = create_app(
|
||||||
service_name="svc-kg",
|
service_name="svc-kg",
|
||||||
title="Tax Agent Knowledge Graph Service",
|
title="Tax Agent Knowledge Graph Service",
|
||||||
description="Service for managing and validating the Knowledge Graph",
|
description="Service for managing and validating the Knowledge Graph",
|
||||||
settings_class=KGSettings,
|
settings_class=KGSettings,
|
||||||
|
startup_hooks=[startup_event],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
# Initialize dependencies immediately
|
|
||||||
@app.on_event("startup")
|
|
||||||
async def startup_event():
|
|
||||||
await init_dependencies(cast(KGSettings, _settings))
|
|
||||||
|
|
||||||
|
|
||||||
tracer = get_tracer("svc-kg")
|
tracer = get_tracer("svc-kg")
|
||||||
metrics = get_metrics()
|
metrics = get_metrics()
|
||||||
|
|
||||||
@@ -100,7 +100,7 @@ async def _handle_kg_upsert_ready(topic: str, payload: EventPayload) -> None:
|
|||||||
data = payload.data
|
data = payload.data
|
||||||
nodes = data.get("nodes", [])
|
nodes = data.get("nodes", [])
|
||||||
relationships = data.get("relationships", [])
|
relationships = data.get("relationships", [])
|
||||||
document_id = data.get("document_id")
|
doc_id = data.get("doc_id")
|
||||||
tenant_id = data.get("tenant_id")
|
tenant_id = data.get("tenant_id")
|
||||||
|
|
||||||
if not nodes and not relationships:
|
if not nodes and not relationships:
|
||||||
@@ -108,7 +108,7 @@ async def _handle_kg_upsert_ready(topic: str, payload: EventPayload) -> None:
|
|||||||
return
|
return
|
||||||
|
|
||||||
with tracer.start_as_current_span("upsert_kg_data") as span:
|
with tracer.start_as_current_span("upsert_kg_data") as span:
|
||||||
span.set_attribute("document_id", document_id)
|
span.set_attribute("doc_id", doc_id)
|
||||||
span.set_attribute("tenant_id", tenant_id)
|
span.set_attribute("tenant_id", tenant_id)
|
||||||
span.set_attribute("node_count", len(nodes))
|
span.set_attribute("node_count", len(nodes))
|
||||||
span.set_attribute("relationship_count", len(relationships))
|
span.set_attribute("relationship_count", len(relationships))
|
||||||
@@ -121,12 +121,12 @@ async def _handle_kg_upsert_ready(topic: str, payload: EventPayload) -> None:
|
|||||||
if not conforms:
|
if not conforms:
|
||||||
logger.error(
|
logger.error(
|
||||||
"SHACL validation failed",
|
"SHACL validation failed",
|
||||||
document_id=document_id,
|
doc_id=doc_id,
|
||||||
validation_report=validation_report,
|
validation_report=validation_report,
|
||||||
)
|
)
|
||||||
metrics.counter("kg_validation_errors_total").labels(
|
metrics.counter(
|
||||||
tenant_id=tenant_id
|
"kg_validation_errors_total", labelnames=["tenant_id"]
|
||||||
).inc()
|
).labels(tenant_id=tenant_id).inc()
|
||||||
return
|
return
|
||||||
|
|
||||||
# 2. Write data to Neo4j
|
# 2. Write data to Neo4j
|
||||||
@@ -144,31 +144,30 @@ async def _handle_kg_upsert_ready(topic: str, payload: EventPayload) -> None:
|
|||||||
# 3. Publish kg.upserted event
|
# 3. Publish kg.upserted event
|
||||||
event_payload = EventPayload(
|
event_payload = EventPayload(
|
||||||
data={
|
data={
|
||||||
"document_id": document_id,
|
"doc_id": doc_id,
|
||||||
"tenant_id": tenant_id,
|
"tenant_id": tenant_id,
|
||||||
"taxpayer_id": data.get("taxpayer_id"),
|
"taxpayer_id": data.get("taxpayer_id"),
|
||||||
"tax_year": data.get("tax_year"),
|
"tax_year": data.get("tax_year"),
|
||||||
"node_count": len(nodes),
|
"node_count": len(nodes),
|
||||||
"relationship_count": len(relationships),
|
"relationship_count": len(relationships),
|
||||||
|
"success": True,
|
||||||
},
|
},
|
||||||
actor=payload.actor,
|
actor=payload.actor,
|
||||||
tenant_id=tenant_id,
|
tenant_id=str(tenant_id),
|
||||||
trace_id=str(span.get_span_context().trace_id),
|
trace_id=str(span.get_span_context().trace_id),
|
||||||
)
|
)
|
||||||
await event_bus.publish(EventTopics.KG_UPSERTED, event_payload) # type: ignore
|
await event_bus.publish(EventTopics.KG_UPSERTED, event_payload) # type: ignore
|
||||||
|
|
||||||
metrics.counter("kg_upserts_total").labels(tenant_id=tenant_id).inc()
|
metrics.counter("kg_upserts_total", labelnames=["tenant_id"]).labels(
|
||||||
logger.info(
|
tenant_id=tenant_id
|
||||||
"KG upsert completed", document_id=document_id, tenant_id=tenant_id
|
).inc()
|
||||||
)
|
logger.info("KG upsert completed", doc_id=doc_id, tenant_id=tenant_id)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(
|
logger.error("Failed to upsert KG data", doc_id=doc_id, error=str(e))
|
||||||
"Failed to upsert KG data", document_id=document_id, error=str(e)
|
metrics.counter(
|
||||||
)
|
"kg_upsert_errors_total", labelnames=["tenant_id", "error_type"]
|
||||||
metrics.counter("kg_upsert_errors_total").labels(
|
).labels(tenant_id=tenant_id, error_type=type(e).__name__).inc()
|
||||||
tenant_id=tenant_id, error_type=type(e).__name__
|
|
||||||
).inc()
|
|
||||||
|
|
||||||
|
|
||||||
async def _validate_with_shacl(
|
async def _validate_with_shacl(
|
||||||
|
|||||||
@@ -67,20 +67,20 @@ async def init_dependencies(app_settings: NormalizeMapSettings) -> None:
|
|||||||
logger.info("NormalizeMap service started successfully")
|
logger.info("NormalizeMap service started successfully")
|
||||||
|
|
||||||
|
|
||||||
|
async def startup_event() -> None:
|
||||||
|
"""Initialize service dependencies"""
|
||||||
|
await init_dependencies(cast(NormalizeMapSettings, _settings))
|
||||||
|
|
||||||
|
|
||||||
app, _settings = create_app(
|
app, _settings = create_app(
|
||||||
service_name="svc-normalize-map",
|
service_name="svc-normalize-map",
|
||||||
title="Tax Agent Normalize and Map Service",
|
title="Tax Agent Normalize and Map Service",
|
||||||
description="Normalize extracted data and map to Knowledge Graph",
|
description="Normalize extracted data and map to Knowledge Graph",
|
||||||
settings_class=NormalizeMapSettings,
|
settings_class=NormalizeMapSettings,
|
||||||
|
startup_hooks=[startup_event],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
# Initialize dependencies immediately
|
|
||||||
@app.on_event("startup")
|
|
||||||
async def startup_event(): # type: ignore
|
|
||||||
await init_dependencies(cast(NormalizeMapSettings, _settings))
|
|
||||||
|
|
||||||
|
|
||||||
tracer = get_tracer("svc-normalize-map")
|
tracer = get_tracer("svc-normalize-map")
|
||||||
metrics = get_metrics()
|
metrics = get_metrics()
|
||||||
|
|
||||||
@@ -314,7 +314,7 @@ async def _map_to_kg_ontology(
|
|||||||
return {
|
return {
|
||||||
"nodes": nodes,
|
"nodes": nodes,
|
||||||
"relationships": relationships,
|
"relationships": relationships,
|
||||||
"document_id": doc_id,
|
"doc_id": doc_id,
|
||||||
"tenant_id": tenant_id,
|
"tenant_id": tenant_id,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1 +0,0 @@
|
|||||||
python-ulid
|
|
||||||
|
|||||||
@@ -4,7 +4,6 @@ import httpx
|
|||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from libs.events import EventTopics, NATSEventBus
|
from libs.events import EventTopics, NATSEventBus
|
||||||
from libs.schemas.events import DocumentExtractedEventData
|
|
||||||
|
|
||||||
# Configuration
|
# Configuration
|
||||||
INGESTION_URL = "http://localhost:8000"
|
INGESTION_URL = "http://localhost:8000"
|
||||||
@@ -26,22 +25,31 @@ async def test_backend_journey():
|
|||||||
)
|
)
|
||||||
await bus.start()
|
await bus.start()
|
||||||
|
|
||||||
# Future to capture the final event
|
# Queues to capture events
|
||||||
extraction_future = asyncio.Future()
|
extraction_queue = asyncio.Queue()
|
||||||
|
kg_ready_queue = asyncio.Queue()
|
||||||
|
kg_upserted_queue = asyncio.Queue()
|
||||||
|
|
||||||
async def extraction_handler(topic, payload):
|
async def extraction_handler(topic, payload):
|
||||||
if payload.tenant_id == TENANT_ID:
|
if payload.tenant_id == TENANT_ID:
|
||||||
extraction_future.set_result(payload)
|
await extraction_queue.put(payload)
|
||||||
|
|
||||||
# Subscribe to the final event in the chain
|
async def kg_ready_handler(topic, payload):
|
||||||
|
await kg_ready_queue.put(payload)
|
||||||
|
|
||||||
|
async def kg_upserted_handler(topic, payload):
|
||||||
|
await kg_upserted_queue.put(payload)
|
||||||
|
|
||||||
|
# Subscribe to events
|
||||||
await bus.subscribe(EventTopics.DOC_EXTRACTED, extraction_handler)
|
await bus.subscribe(EventTopics.DOC_EXTRACTED, extraction_handler)
|
||||||
|
await bus.subscribe(EventTopics.KG_UPSERT_READY, kg_ready_handler)
|
||||||
|
await bus.subscribe(EventTopics.KG_UPSERTED, kg_upserted_handler)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# 2. Upload a document
|
# 2. Upload a document
|
||||||
async with httpx.AsyncClient(
|
async with httpx.AsyncClient(
|
||||||
verify=False
|
verify=False
|
||||||
) as client: # Disable SSL verification for local testing
|
) as client: # Disable SSL verification for local testing
|
||||||
# Create a dummy PDF file
|
|
||||||
# Create a valid minimal PDF file
|
# Create a valid minimal PDF file
|
||||||
pdf_content = (
|
pdf_content = (
|
||||||
b"%PDF-1.0\n1 0 obj<</Type/Catalog/Pages 2 0 R>>endobj 2 0 obj<</Type/Pages/Kids[3 0 R]/Count 1>>endobj "
|
b"%PDF-1.0\n1 0 obj<</Type/Catalog/Pages 2 0 R>>endobj 2 0 obj<</Type/Pages/Kids[3 0 R]/Count 1>>endobj "
|
||||||
@@ -67,25 +75,50 @@ async def test_backend_journey():
|
|||||||
doc_id = upload_data["doc_id"]
|
doc_id = upload_data["doc_id"]
|
||||||
print(f"Uploaded document: {doc_id}")
|
print(f"Uploaded document: {doc_id}")
|
||||||
|
|
||||||
# 3. Wait for extraction event (with timeout)
|
# Helper to wait for matching event
|
||||||
|
async def wait_for_event(queue, event_name):
|
||||||
|
start_time = asyncio.get_event_loop().time()
|
||||||
|
timeout = 30.0
|
||||||
|
while True:
|
||||||
|
remaining = timeout - (asyncio.get_event_loop().time() - start_time)
|
||||||
|
if remaining <= 0:
|
||||||
|
raise TimeoutError(f"Timed out waiting for {event_name}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
payload = await asyncio.wait_for(queue.get(), timeout=remaining)
|
||||||
|
data = payload.data
|
||||||
|
if data.get("doc_id") == doc_id:
|
||||||
|
return payload
|
||||||
|
print(
|
||||||
|
f"Ignoring {event_name} for different doc_id: {data.get('doc_id')}"
|
||||||
|
)
|
||||||
|
except TimeoutError:
|
||||||
|
raise TimeoutError(f"Timed out waiting for {event_name}")
|
||||||
|
|
||||||
|
# 3. Wait for extraction event
|
||||||
try:
|
try:
|
||||||
# Give it enough time for the whole chain to process
|
payload = await wait_for_event(extraction_queue, "extraction event")
|
||||||
payload = await asyncio.wait_for(extraction_future, timeout=30.0)
|
print("Extraction completed successfully!")
|
||||||
|
|
||||||
# 4. Verify payload
|
|
||||||
data = payload.data
|
|
||||||
assert data["doc_id"] == doc_id
|
|
||||||
assert data["tenant_id"] == TENANT_ID
|
|
||||||
assert "extraction_results" in data
|
|
||||||
|
|
||||||
# Validate against schema
|
|
||||||
event_data = DocumentExtractedEventData(**data)
|
|
||||||
assert event_data.doc_id == doc_id
|
|
||||||
|
|
||||||
print("E2E Journey completed successfully!")
|
|
||||||
|
|
||||||
except TimeoutError:
|
except TimeoutError:
|
||||||
pytest.fail("Timed out waiting for extraction event")
|
pytest.fail("Timed out waiting for extraction event")
|
||||||
|
|
||||||
|
# 4. Wait for KG Ready event
|
||||||
|
try:
|
||||||
|
payload = await wait_for_event(kg_ready_queue, "KG Ready event")
|
||||||
|
print("Normalization completed successfully!")
|
||||||
|
except TimeoutError:
|
||||||
|
pytest.fail("Timed out waiting for KG Ready event")
|
||||||
|
|
||||||
|
# 5. Wait for KG Upserted event
|
||||||
|
try:
|
||||||
|
payload = await wait_for_event(kg_upserted_queue, "KG Upserted event")
|
||||||
|
data = payload.data
|
||||||
|
assert data["success"] is True
|
||||||
|
print("KG Upsert completed successfully!")
|
||||||
|
print("E2E Journey completed successfully!")
|
||||||
|
|
||||||
|
except TimeoutError:
|
||||||
|
pytest.fail("Timed out waiting for KG Upserted event")
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
await bus.stop()
|
await bus.stop()
|
||||||
|
|||||||
Reference in New Issue
Block a user