Initial commit
Some checks failed
CI/CD Pipeline / Code Quality & Linting (push) Has been cancelled
CI/CD Pipeline / Policy Validation (push) Has been cancelled
CI/CD Pipeline / Test Suite (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-firm-connectors) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-forms) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-hmrc) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ingestion) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-normalize-map) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ocr) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-indexer) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-reason) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rpa) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (ui-review) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (ui-review) (push) Has been cancelled
CI/CD Pipeline / Generate SBOM (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Notifications (push) Has been cancelled

This commit is contained in:
harkon
2025-10-11 08:41:36 +01:00
commit b324ff09ef
276 changed files with 55220 additions and 0 deletions

622
tests/unit/test_neo.py Normal file
View File

@@ -0,0 +1,622 @@
# tests/unit/test_neo.py
# Unit tests for libs/neo.py
from datetime import datetime
from unittest.mock import AsyncMock, Mock, patch
import pytest
from libs.neo import Neo4jClient, SHACLValidator, TemporalQueries
# pylint: disable=wrong-import-position,import-error,too-few-public-methods,global-statement
# pylint: disable=raise-missing-from,unused-argument,too-many-arguments,too-many-positional-arguments
# pylint: disable=too-many-locals,import-outside-toplevel
# mypy: disable-error-code=union-attr
# mypy: disable-error-code=no-untyped-def
class TestNeo4jClient:
"""Test Neo4jClient class"""
def test_neo4j_client_init(self):
"""Test Neo4jClient initialization"""
mock_driver = Mock()
client = Neo4jClient(mock_driver)
assert client.driver == mock_driver
@pytest.mark.asyncio
async def test_close(self):
"""Test closing the driver"""
mock_driver = Mock()
mock_driver.close = Mock()
client = Neo4jClient(mock_driver)
with patch("asyncio.get_event_loop") as mock_get_loop:
mock_loop = Mock()
mock_get_loop.return_value = mock_loop
mock_loop.run_in_executor = AsyncMock()
await client.close()
mock_loop.run_in_executor.assert_called_once_with(None, mock_driver.close)
@pytest.mark.asyncio
async def test_run_query_success(self):
"""Test successful query execution"""
mock_driver = Mock()
mock_session = Mock()
mock_result = Mock()
mock_record = Mock()
mock_record.data.return_value = {"name": "test", "value": 123}
mock_result.__iter__ = Mock(return_value=iter([mock_record]))
mock_session.run.return_value = mock_result
mock_driver.session.return_value.__enter__ = Mock(return_value=mock_session)
mock_driver.session.return_value.__exit__ = Mock(return_value=None)
client = Neo4jClient(mock_driver)
with patch("asyncio.get_event_loop") as mock_get_loop:
mock_loop = Mock()
mock_get_loop.return_value = mock_loop
mock_loop.run_in_executor = AsyncMock(
return_value=[{"name": "test", "value": 123}]
)
result = await client.run_query("MATCH (n) RETURN n", {"param": "value"})
assert result == [{"name": "test", "value": 123}]
mock_loop.run_in_executor.assert_called_once()
@pytest.mark.asyncio
async def test_run_query_with_retries(self):
"""Test query execution with retries on transient errors"""
from neo4j.exceptions import TransientError
mock_driver = Mock()
client = Neo4jClient(mock_driver)
with (
patch("asyncio.get_event_loop") as mock_get_loop,
patch("asyncio.sleep", new_callable=AsyncMock) as mock_sleep,
):
mock_loop = Mock()
mock_get_loop.return_value = mock_loop
# First two calls fail, third succeeds
mock_loop.run_in_executor = AsyncMock(
side_effect=[
TransientError("Connection lost"),
TransientError("Connection lost"),
[{"result": "success"}],
]
)
result = await client.run_query("MATCH (n) RETURN n", max_retries=3)
assert result == [{"result": "success"}]
assert mock_loop.run_in_executor.call_count == 3
assert mock_sleep.call_count == 2 # Two retries
@pytest.mark.asyncio
async def test_run_query_max_retries_exceeded(self):
"""Test query execution when max retries exceeded"""
from neo4j.exceptions import TransientError
mock_driver = Mock()
client = Neo4jClient(mock_driver)
with (
patch("asyncio.get_event_loop") as mock_get_loop,
patch("asyncio.sleep", new_callable=AsyncMock),
):
mock_loop = Mock()
mock_get_loop.return_value = mock_loop
mock_loop.run_in_executor = AsyncMock(
side_effect=TransientError("Connection lost")
)
with pytest.raises(TransientError):
await client.run_query("MATCH (n) RETURN n", max_retries=2)
assert mock_loop.run_in_executor.call_count == 2
@pytest.mark.asyncio
async def test_run_query_non_retryable_error(self):
"""Test query execution with non-retryable error"""
mock_driver = Mock()
client = Neo4jClient(mock_driver)
with patch("asyncio.get_event_loop") as mock_get_loop:
mock_loop = Mock()
mock_get_loop.return_value = mock_loop
mock_loop.run_in_executor = AsyncMock(
side_effect=ValueError("Invalid query")
)
with pytest.raises(ValueError):
await client.run_query("INVALID QUERY")
assert mock_loop.run_in_executor.call_count == 1 # No retries
@pytest.mark.asyncio
async def test_run_transaction_success(self):
"""Test successful transaction execution"""
mock_driver = Mock()
client = Neo4jClient(mock_driver)
def mock_transaction_func(tx):
return {"created": "node"}
with patch("asyncio.get_event_loop") as mock_get_loop:
mock_loop = Mock()
mock_get_loop.return_value = mock_loop
mock_loop.run_in_executor = AsyncMock(return_value={"created": "node"})
result = await client.run_transaction(mock_transaction_func)
assert result == {"created": "node"}
mock_loop.run_in_executor.assert_called_once()
@pytest.mark.asyncio
async def test_create_node(self):
"""Test node creation with temporal properties"""
mock_driver = Mock()
client = Neo4jClient(mock_driver)
properties = {"name": "Test Node", "value": 123}
with patch.object(client, "run_query") as mock_run_query:
mock_run_query.return_value = [
{
"n": {
"name": "Test Node",
"value": 123,
"asserted_at": "2023-01-01T00:00:00",
}
}
]
result = await client.create_node("TestLabel", properties)
assert result == {
"name": "Test Node",
"value": 123,
"asserted_at": "2023-01-01T00:00:00",
}
mock_run_query.assert_called_once()
# Check that asserted_at was added to properties
call_args = mock_run_query.call_args
assert "asserted_at" in call_args[0][1]["properties"]
@pytest.mark.asyncio
async def test_create_node_with_existing_asserted_at(self):
"""Test node creation when asserted_at already exists"""
mock_driver = Mock()
client = Neo4jClient(mock_driver)
existing_time = datetime(2023, 1, 1, 12, 0, 0)
properties = {"name": "Test Node", "asserted_at": existing_time}
with patch.object(client, "run_query") as mock_run_query:
mock_run_query.return_value = [{"n": properties}]
result = await client.create_node("TestLabel", properties)
# Should not modify existing asserted_at
call_args = mock_run_query.call_args
assert call_args[0][1]["properties"]["asserted_at"] == existing_time
@pytest.mark.asyncio
async def test_update_node(self):
"""Test node update with bitemporal versioning"""
mock_driver = Mock()
client = Neo4jClient(mock_driver)
properties = {"name": "Updated Node", "value": 456}
with patch.object(client, "run_transaction") as mock_run_transaction:
mock_run_transaction.return_value = {"name": "Updated Node", "value": 456}
result = await client.update_node("TestLabel", "node123", properties)
assert result == {"name": "Updated Node", "value": 456}
mock_run_transaction.assert_called_once()
@pytest.mark.asyncio
async def test_create_relationship(self):
"""Test relationship creation"""
mock_driver = Mock()
client = Neo4jClient(mock_driver)
rel_properties = {"strength": 0.8, "type": "RELATED_TO"}
with patch.object(client, "run_query") as mock_run_query:
mock_run_query.return_value = [{"r": rel_properties}]
result = await client.create_relationship(
"Person", "person1", "Company", "company1", "WORKS_FOR", rel_properties
)
assert result == rel_properties
mock_run_query.assert_called_once()
# Check query parameters
call_args = mock_run_query.call_args
params = call_args[0][1]
assert params["from_id"] == "person1"
assert params["to_id"] == "company1"
assert "asserted_at" in params["properties"]
@pytest.mark.asyncio
async def test_get_node_lineage(self):
"""Test getting node lineage"""
mock_driver = Mock()
client = Neo4jClient(mock_driver)
lineage_data = [
{"path": "path1", "evidence": {"id": "evidence1"}},
{"path": "path2", "evidence": {"id": "evidence2"}},
]
with patch.object(client, "run_query") as mock_run_query:
mock_run_query.return_value = lineage_data
result = await client.get_node_lineage("node123", max_depth=5)
assert result == lineage_data
mock_run_query.assert_called_once()
# Check query parameters
call_args = mock_run_query.call_args
params = call_args[0][1]
assert params["node_id"] == "node123"
assert params["max_depth"] == 5
@pytest.mark.asyncio
async def test_export_to_rdf_success(self):
"""Test successful RDF export"""
mock_driver = Mock()
client = Neo4jClient(mock_driver)
export_result = [{"triplesCount": 100, "format": "turtle"}]
with patch.object(client, "run_query") as mock_run_query:
mock_run_query.return_value = export_result
result = await client.export_to_rdf("turtle")
assert result == {"triplesCount": 100, "format": "turtle"}
mock_run_query.assert_called_once()
@pytest.mark.asyncio
async def test_export_to_rdf_fallback(self):
"""Test RDF export with fallback"""
mock_driver = Mock()
client = Neo4jClient(mock_driver)
with (
patch.object(client, "run_query") as mock_run_query,
patch.object(client, "_export_rdf_fallback") as mock_fallback,
):
mock_run_query.side_effect = Exception("n10s plugin not available")
mock_fallback.return_value = "fallback_rdf_data"
result = await client.export_to_rdf("turtle")
assert result == {"rdf_data": "fallback_rdf_data", "format": "turtle"}
mock_fallback.assert_called_once_with("neo4j")
@pytest.mark.asyncio
async def test_export_rdf_fallback(self):
"""Test fallback RDF export method"""
mock_driver = Mock()
client = Neo4jClient(mock_driver)
nodes_data = [
{"labels": ["Person"], "props": {"name": "John"}, "neo_id": 1},
{"labels": ["Company"], "props": {"name": "Acme"}, "neo_id": 2},
]
rels_data = [{"type": "WORKS_FOR", "props": {}, "from_id": 1, "to_id": 2}]
with patch.object(client, "run_query") as mock_run_query:
mock_run_query.side_effect = [nodes_data, rels_data]
result = await client._export_rdf_fallback()
assert isinstance(result, str)
assert (
"Person" in result or "Company" in result
) # Should contain some RDF data
assert mock_run_query.call_count == 2
class TestSHACLValidator:
"""Test SHACLValidator class"""
def test_shacl_validator_init(self):
"""Test SHACLValidator initialization"""
validator = SHACLValidator("/path/to/shapes.ttl")
assert validator.shapes_file == "/path/to/shapes.ttl"
@pytest.mark.asyncio
async def test_validate_graph_success(self):
"""Test successful SHACL validation"""
validator = SHACLValidator("/path/to/shapes.ttl")
rdf_data = """
@prefix ex: <http://example.org/> .
ex:person1 a ex:Person ;
ex:name "John Doe" ;
ex:age 30 .
"""
def mock_validate():
# Mock pySHACL validation
with (
patch("pyshacl.validate") as mock_pyshacl,
patch("rdflib.Graph") as mock_graph_class,
):
mock_data_graph = Mock()
mock_shapes_graph = Mock()
mock_results_graph = Mock()
mock_results_graph.subjects.return_value = [] # No violations
mock_graph_class.side_effect = [mock_data_graph, mock_shapes_graph]
mock_pyshacl.return_value = (
True,
mock_results_graph,
"Validation passed",
)
return validator._SHACLValidator__validate_sync(rdf_data)
with patch("asyncio.get_event_loop") as mock_get_loop:
mock_loop = Mock()
mock_get_loop.return_value = mock_loop
mock_loop.run_in_executor = AsyncMock(
return_value={
"conforms": True,
"results_text": "Validation passed",
"violations_count": 0,
}
)
result = await validator.validate_graph(rdf_data)
assert result["conforms"] is True
assert result["violations_count"] == 0
assert "passed" in result["results_text"]
@pytest.mark.asyncio
async def test_validate_graph_with_violations(self):
"""Test SHACL validation with violations"""
validator = SHACLValidator("/path/to/shapes.ttl")
rdf_data = """
@prefix ex: <http://example.org/> .
ex:person1 a ex:Person ;
ex:name "John Doe" .
"""
with patch("asyncio.get_event_loop") as mock_get_loop:
mock_loop = Mock()
mock_get_loop.return_value = mock_loop
mock_loop.run_in_executor = AsyncMock(
return_value={
"conforms": False,
"results_text": "Missing required property: age",
"violations_count": 1,
}
)
result = await validator.validate_graph(rdf_data)
assert result["conforms"] is False
assert result["violations_count"] == 1
assert "Missing" in result["results_text"]
@pytest.mark.asyncio
async def test_validate_graph_import_error(self):
"""Test SHACL validation when pySHACL not available"""
validator = SHACLValidator("/path/to/shapes.ttl")
with patch("asyncio.get_event_loop") as mock_get_loop:
mock_loop = Mock()
mock_get_loop.return_value = mock_loop
mock_loop.run_in_executor = AsyncMock(
return_value={
"conforms": True,
"results_text": "SHACL validation skipped (pySHACL not installed)",
"violations_count": 0,
}
)
result = await validator.validate_graph(
"@prefix ex: <http://example.org/> ."
)
assert result["conforms"] is True
assert result["violations_count"] == 0
assert "skipped" in result["results_text"]
@pytest.mark.asyncio
async def test_validate_graph_validation_error(self):
"""Test SHACL validation with validation error"""
validator = SHACLValidator("/path/to/shapes.ttl")
with patch("asyncio.get_event_loop") as mock_get_loop:
mock_loop = Mock()
mock_get_loop.return_value = mock_loop
mock_loop.run_in_executor = AsyncMock(
return_value={
"conforms": False,
"results_text": "Validation error: Invalid RDF syntax",
"violations_count": -1,
}
)
result = await validator.validate_graph("invalid rdf data")
assert result["conforms"] is False
assert result["violations_count"] == -1
assert "error" in result["results_text"]
class TestTemporalQueries:
"""Test TemporalQueries class"""
def test_get_current_state_query_no_filters(self):
"""Test current state query without filters"""
query = TemporalQueries.get_current_state_query("Person")
assert "MATCH (n:Person)" in query
assert "n.retracted_at IS NULL" in query
assert "ORDER BY n.asserted_at DESC" in query
def test_get_current_state_query_with_filters(self):
"""Test current state query with filters"""
filters = {"name": "John Doe", "age": 30, "active": True}
query = TemporalQueries.get_current_state_query("Person", filters)
assert "MATCH (n:Person)" in query
assert "n.retracted_at IS NULL" in query
assert "n.name = 'John Doe'" in query
assert "n.age = 30" in query
assert "n.active = True" in query
def test_get_historical_state_query_no_filters(self):
"""Test historical state query without filters"""
as_of_time = datetime(2023, 6, 15, 12, 0, 0)
query = TemporalQueries.get_historical_state_query("Person", as_of_time)
assert "MATCH (n:Person)" in query
assert "n.asserted_at <= datetime('2023-06-15T12:00:00')" in query
assert (
"n.retracted_at IS NULL OR n.retracted_at > datetime('2023-06-15T12:00:00')"
in query
)
assert "ORDER BY n.asserted_at DESC" in query
def test_get_historical_state_query_with_filters(self):
"""Test historical state query with filters"""
as_of_time = datetime(2023, 6, 15, 12, 0, 0)
filters = {"department": "Engineering", "level": 5}
query = TemporalQueries.get_historical_state_query(
"Employee", as_of_time, filters
)
assert "MATCH (n:Employee)" in query
assert "n.asserted_at <= datetime('2023-06-15T12:00:00')" in query
assert "n.department = 'Engineering'" in query
assert "n.level = 5" in query
def test_get_audit_trail_query(self):
"""Test audit trail query"""
query = TemporalQueries.get_audit_trail_query("node123")
assert "MATCH (n {id: 'node123'})" in query
assert "n.asserted_at as asserted_at" in query
assert "n.retracted_at as retracted_at" in query
assert "n.source as source" in query
assert "n.extractor_version as extractor_version" in query
assert "properties(n) as properties" in query
assert "ORDER BY n.asserted_at ASC" in query
class TestIntegration:
"""Test integration scenarios"""
@pytest.mark.asyncio
async def test_full_neo4j_workflow(self):
"""Test complete Neo4j workflow"""
mock_driver = Mock()
client = Neo4jClient(mock_driver)
# Mock all the operations
with (
patch.object(client, "create_node") as mock_create,
patch.object(client, "create_relationship") as mock_create_rel,
patch.object(client, "get_node_lineage") as mock_lineage,
):
mock_create.return_value = {"id": "person1", "name": "John Doe"}
mock_create_rel.return_value = {"type": "WORKS_FOR", "strength": 0.8}
mock_lineage.return_value = [{"path": "lineage_path"}]
# Create nodes
person = await client.create_node("Person", {"name": "John Doe"})
company = await client.create_node("Company", {"name": "Acme Corp"})
# Create relationship
relationship = await client.create_relationship(
"Person",
"person1",
"Company",
"company1",
"WORKS_FOR",
{"strength": 0.8},
)
# Get lineage
lineage = await client.get_node_lineage("person1")
assert person["name"] == "John Doe"
assert relationship["type"] == "WORKS_FOR"
assert len(lineage) == 1
@pytest.mark.asyncio
async def test_temporal_queries_integration(self):
"""Test temporal queries integration"""
mock_driver = Mock()
client = Neo4jClient(mock_driver)
# Test current state query
current_query = TemporalQueries.get_current_state_query(
"Person", {"active": True}
)
assert "Person" in current_query
assert "active = True" in current_query
# Test historical state query
historical_time = datetime(2023, 1, 1, 0, 0, 0)
historical_query = TemporalQueries.get_historical_state_query(
"Person", historical_time
)
assert "2023-01-01T00:00:00" in historical_query
# Test audit trail query
audit_query = TemporalQueries.get_audit_trail_query("person123")
assert "person123" in audit_query
@pytest.mark.asyncio
async def test_shacl_validation_integration(self):
"""Test SHACL validation integration"""
validator = SHACLValidator("/path/to/shapes.ttl")
# Mock the validation process
with patch("asyncio.get_event_loop") as mock_get_loop:
mock_loop = Mock()
mock_get_loop.return_value = mock_loop
mock_loop.run_in_executor = AsyncMock(
return_value={
"conforms": True,
"results_text": "All constraints satisfied",
"violations_count": 0,
}
)
rdf_data = "@prefix ex: <http://example.org/> . ex:person1 a ex:Person ."
result = await validator.validate_graph(rdf_data)
assert result["conforms"] is True
assert result["violations_count"] == 0