completed local setup with compose
Some checks failed
CI/CD Pipeline / Generate SBOM (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Code Quality & Linting (push) Has been cancelled
CI/CD Pipeline / Policy Validation (push) Has been cancelled
CI/CD Pipeline / Test Suite (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-firm-connectors) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-forms) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-hmrc) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ingestion) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-normalize-map) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ocr) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-indexer) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-reason) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rpa) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (ui-review) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (ui-review) (push) Has been cancelled
CI/CD Pipeline / Notifications (push) Has been cancelled

This commit is contained in:
harkon
2025-11-26 13:17:17 +00:00
parent 8fe5e62fee
commit fdba81809f
87 changed files with 5610 additions and 3376 deletions

View File

@@ -0,0 +1,39 @@
import pytest
from libs.events import EventTopics
from libs.schemas.events import DocumentIngestedEventData, validate_event_data
@pytest.mark.integration
def test_doc_ingested_contract():
"""
Contract test for DOC_INGESTED event.
Verifies that the event data schema matches the expected Pydantic model.
"""
# Sample valid payload data
valid_data = {
"doc_id": "doc_01H1V2W3X4Y5Z6",
"filename": "test.pdf",
"kind": "invoice",
"source": "upload",
"checksum_sha256": "a" * 64,
"size_bytes": 1024,
"mime_type": "application/pdf",
"storage_path": "s3://bucket/key.pdf",
}
# 1. Verify it validates against the Pydantic model directly
model = DocumentIngestedEventData(**valid_data)
assert model.doc_id == valid_data["doc_id"]
# 2. Verify it validates using the shared validation utility
validated_model = validate_event_data(EventTopics.DOC_INGESTED, valid_data)
assert isinstance(validated_model, DocumentIngestedEventData)
assert validated_model.doc_id == valid_data["doc_id"]
# 3. Verify invalid data fails
invalid_data = valid_data.copy()
del invalid_data["doc_id"]
with pytest.raises(ValueError):
validate_event_data(EventTopics.DOC_INGESTED, invalid_data)

View File

@@ -0,0 +1,98 @@
import asyncio
import pytest
from libs.events.base import EventPayload
from libs.events.nats_bus import NATSEventBus
from libs.schemas.events import DocumentIngestedEventData
@pytest.mark.asyncio
async def test_nats_bus_class():
"""Test NATSEventBus class within pytest."""
import time
unique_suffix = int(time.time())
stream_name = f"PYTEST_DEBUG_STREAM_{unique_suffix}"
print(f"\nStarting NATSEventBus with stream {stream_name}...")
bus = NATSEventBus(
servers="nats://localhost:4222",
stream_name=stream_name,
consumer_group="test-debug-group",
)
await bus.start()
print("Bus started.")
# Clean up (just in case)
try:
await bus.js.delete_stream(stream_name)
except Exception:
pass
await bus._ensure_stream_exists()
# Wait for stream to be ready
await asyncio.sleep(2)
try:
info = await bus.js.stream_info(stream_name)
print(f"Stream info: {info.config.subjects}")
except Exception as e:
print(f"Failed to get stream info: {e}")
# Setup subscriber
received_event = asyncio.Future()
async def handler(topic, event):
print(f"Handler received event: {event.event_id}")
if not received_event.done():
received_event.set_result(event)
await bus.subscribe("doc.ingested", handler)
print("Publishing message...")
data = DocumentIngestedEventData(
doc_id="test-doc-123",
filename="test.pdf",
mime_type="application/pdf",
size_bytes=1024,
source="upload",
kind="invoice",
storage_path="s3://test-bucket/test.pdf",
checksum_sha256="a" * 64,
)
payload = EventPayload(
data=data.model_dump(mode="json"),
actor="tester",
tenant_id="tenant-1",
schema_version="1.0",
)
payload.event_id = "evt-debug-1"
success = await bus.publish("doc.ingested", payload)
print(f"Published: {success}")
try:
result = await asyncio.wait_for(received_event, timeout=5.0)
print(f"Received event: {result.event_id}")
assert result.event_id == "evt-debug-1"
assert result.data["doc_id"] == "test-doc-123"
except TimeoutError:
print("Timeout waiting for event")
raise
await bus.stop()
print("Bus stopped.")
# Cleanup stream
try:
nc = await nats.connect("nats://localhost:4222")
js = nc.jetstream()
await js.delete_stream(stream_name)
await nc.close()
except Exception:
pass

View File

@@ -0,0 +1,240 @@
import asyncio
import json
import pytest
import pytest_asyncio
from libs.events.base import EventPayload
from libs.events.nats_bus import NATSEventBus
from libs.schemas.events import DocumentIngestedEventData
# Check if NATS is available
async def is_nats_available():
import nats
try:
nc = await nats.connect("nats://localhost:4222")
await nc.close()
return True
except Exception:
return False
@pytest_asyncio.fixture
async def nats_bus():
"""Create and start a NATS event bus for testing."""
if not await is_nats_available():
pytest.skip("NATS server not available at localhost:4222")
bus = NATSEventBus(
servers="nats://localhost:4222",
stream_name="TEST_INTEGRATION_STREAM",
consumer_group="test-integration-group",
dlq_stream_name="TEST_INTEGRATION_DLQ",
max_retries=2,
)
await bus.start()
# Clean up streams before test
try:
await bus.js.delete_stream("TEST_INTEGRATION_STREAM")
await bus.js.delete_stream("TEST_INTEGRATION_DLQ")
except Exception:
pass
# Re-create streams
await bus._ensure_stream_exists()
await bus.dlq.ensure_dlq_stream_exists()
# Allow time for streams to propagate
await asyncio.sleep(2)
yield bus
# Clean up after test
try:
await bus.js.delete_stream("TEST_INTEGRATION_STREAM")
await bus.js.delete_stream("TEST_INTEGRATION_DLQ")
except Exception:
pass
await bus.stop()
@pytest.mark.integration
@pytest.mark.asyncio
async def test_publish_subscribe_flow():
"""Test end-to-end publish and subscribe flow."""
# Instantiate bus directly to debug fixture issues
bus = NATSEventBus(
servers="nats://localhost:4222",
stream_name="TEST_INTEGRATION_STREAM_DIRECT",
consumer_group="test-integration-group-direct",
dlq_stream_name="TEST_INTEGRATION_DLQ_DIRECT",
max_retries=2,
)
await bus.start()
try:
await bus.js.delete_stream("TEST_INTEGRATION_STREAM_DIRECT")
except Exception:
pass
await bus._ensure_stream_exists()
try:
# Create event data
data = DocumentIngestedEventData(
doc_id="test-doc-123",
filename="test.pdf",
mime_type="application/pdf",
size_bytes=1024,
source="upload",
kind="invoice",
storage_path="s3://test-bucket/test.pdf",
checksum_sha256="a" * 64,
)
payload = EventPayload(
data=data.model_dump(mode="json"),
actor="test-user",
tenant_id="test-tenant",
trace_id="trace-123",
schema_version="1.0",
)
payload.event_id = "evt-123"
# Setup subscriber
received_event = asyncio.Future()
async def handler(topic, event):
if not received_event.done():
received_event.set_result(event)
await bus.subscribe("doc.ingested", handler)
# Publish event
success = await bus.publish("doc.ingested", payload)
assert success is True
# Wait for reception
try:
result = await asyncio.wait_for(received_event, timeout=5.0)
assert result.event_id == payload.event_id
assert result.data["doc_id"] == "test-doc-123"
except TimeoutError:
pytest.fail("Event not received within timeout")
finally:
await bus.stop()
@pytest.mark.integration
@pytest.mark.asyncio
async def test_dlq_routing(nats_bus):
"""Test that failed events are routed to DLQ after retries."""
# Create event data
data = DocumentIngestedEventData(
doc_id="test-doc-fail",
filename="fail.pdf",
mime_type="application/pdf",
size_bytes=1024,
source="upload",
kind="invoice",
storage_path="s3://test-bucket/fail.pdf",
checksum_sha256="a" * 64,
)
payload = EventPayload(
data=data.model_dump(mode="json"),
actor="test-user",
tenant_id="test-tenant",
trace_id="trace-fail",
schema_version="1.0",
)
# Setup failing handler
failure_count = 0
async def failing_handler(topic, event):
nonlocal failure_count
failure_count += 1
raise ValueError("Simulated processing failure")
await nats_bus.subscribe("doc.fail", failing_handler)
# Publish event
await nats_bus.publish("doc.fail", payload)
# Wait for retries and DLQ routing
await asyncio.sleep(2.0) # Wait for processing
assert failure_count >= 2
# Consume from DLQ to verify
dlq_sub = await nats_bus.js.pull_subscribe(
subject="TEST_INTEGRATION_DLQ.doc.fail", durable="test-dlq-consumer"
)
msgs = await dlq_sub.fetch(batch=1, timeout=5.0)
assert len(msgs) == 1
dlq_msg = msgs[0]
dlq_data = json.loads(dlq_msg.data.decode())
assert dlq_data["original_payload"]["event_id"] == payload.event_id
assert dlq_data["error"]["type"] == "ValueError"
assert dlq_data["error"]["message"] == "Simulated processing failure"
await dlq_msg.ack()
@pytest.mark.integration
@pytest.mark.asyncio
async def test_metrics_recording(nats_bus):
"""Test that metrics are recorded during event processing."""
from libs.events.metrics import event_consumed_total, event_published_total
# Get initial values
initial_published = event_published_total.labels(topic="doc.metrics")._value.get()
initial_consumed = event_consumed_total.labels(
topic="doc.metrics", consumer_group="test-integration-group"
)._value.get()
# Create and publish event
data = DocumentIngestedEventData(
doc_id="test-doc-metrics",
filename="metrics.pdf",
mime_type="application/pdf",
size_bytes=1024,
source="upload",
kind="invoice",
storage_path="s3://test-bucket/metrics.pdf",
checksum_sha256="a" * 64,
)
payload = EventPayload(
data=data.model_dump(mode="json"),
actor="test-user",
tenant_id="test-tenant",
trace_id="trace-metrics",
schema_version="1.0",
)
received_event = asyncio.Future()
async def handler(topic, event):
if not received_event.done():
received_event.set_result(event)
await nats_bus.subscribe("doc.metrics", handler)
await nats_bus.publish("doc.metrics", payload)
await asyncio.wait_for(received_event, timeout=5.0)
# Check metrics increased
final_published = event_published_total.labels(topic="doc.metrics")._value.get()
final_consumed = event_consumed_total.labels(
topic="doc.metrics", consumer_group="test-integration-group"
)._value.get()
assert final_published > initial_published
assert final_consumed > initial_consumed