Some checks failed
CI/CD Pipeline / Code Quality & Linting (push) Has been cancelled
CI/CD Pipeline / Policy Validation (push) Has been cancelled
CI/CD Pipeline / Test Suite (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-firm-connectors) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-forms) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-hmrc) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ingestion) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-normalize-map) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ocr) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-indexer) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-reason) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rpa) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (ui-review) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (ui-review) (push) Has been cancelled
CI/CD Pipeline / Generate SBOM (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Notifications (push) Has been cancelled
573 lines
18 KiB
Python
573 lines
18 KiB
Python
# FILE: apps/svc-kg/main.py
|
|
|
|
# Knowledge graph facade with CRUD, queries, lineage, and SHACL validation
|
|
|
|
import json
|
|
import os
|
|
|
|
# Import shared libraries
|
|
import sys
|
|
from datetime import datetime
|
|
from typing import Any
|
|
|
|
import structlog
|
|
from fastapi import Depends, HTTPException, Query, Request
|
|
from fastapi.responses import JSONResponse
|
|
|
|
sys.path.append(os.path.join(os.path.dirname(__file__), "..", ".."))
|
|
|
|
from libs.app_factory import create_app
|
|
from libs.config import BaseAppSettings, create_event_bus, create_neo4j_client
|
|
from libs.events import EventBus
|
|
from libs.neo import Neo4jClient, SHACLValidator, TemporalQueries
|
|
from libs.observability import get_metrics, get_tracer, setup_observability
|
|
from libs.schemas import ErrorResponse
|
|
from libs.security import get_current_user, get_tenant_id
|
|
|
|
logger = structlog.get_logger()
|
|
|
|
|
|
class KGSettings(BaseAppSettings):
|
|
"""Settings for KG service"""
|
|
|
|
service_name: str = "svc-kg"
|
|
|
|
# SHACL validation
|
|
shapes_file: str = "schemas/shapes.ttl"
|
|
validate_on_write: bool = True
|
|
|
|
# Query limits
|
|
max_results: int = 1000
|
|
max_depth: int = 10
|
|
query_timeout: int = 30
|
|
|
|
|
|
# Create app and settings
|
|
app, settings = create_app(
|
|
service_name="svc-kg",
|
|
title="Tax Agent Knowledge Graph Service",
|
|
description="Knowledge graph facade with CRUD and queries",
|
|
settings_class=KGSettings,
|
|
)
|
|
|
|
# Global clients
|
|
neo4j_client: Neo4jClient | None = None
|
|
shacl_validator: SHACLValidator | None = None
|
|
event_bus: EventBus | None = None
|
|
tracer = get_tracer("svc-kg")
|
|
metrics = get_metrics()
|
|
|
|
|
|
@app.on_event("startup")
|
|
async def startup_event() -> None:
|
|
"""Initialize service dependencies"""
|
|
global neo4j_client, shacl_validator, event_bus
|
|
|
|
logger.info("Starting KG service")
|
|
|
|
# Setup observability
|
|
setup_observability(settings)
|
|
|
|
# Initialize Neo4j client
|
|
neo4j_driver = create_neo4j_client(settings)
|
|
neo4j_client = Neo4jClient(neo4j_driver)
|
|
|
|
# Initialize SHACL validator
|
|
if os.path.exists(settings.shapes_file):
|
|
shacl_validator = SHACLValidator(settings.shapes_file)
|
|
|
|
# Initialize event bus
|
|
event_bus = create_event_bus(settings)
|
|
await event_bus.start()
|
|
|
|
logger.info("KG service started successfully")
|
|
|
|
|
|
@app.on_event("shutdown")
|
|
async def shutdown_event() -> None:
|
|
"""Cleanup service dependencies"""
|
|
global neo4j_client, event_bus
|
|
|
|
logger.info("Shutting down KG service")
|
|
|
|
if neo4j_client:
|
|
await neo4j_client.close()
|
|
|
|
if event_bus:
|
|
await event_bus.stop()
|
|
|
|
logger.info("KG service shutdown complete")
|
|
|
|
|
|
@app.get("/health")
|
|
async def health_check() -> dict[str, Any]:
|
|
"""Health check endpoint"""
|
|
return {
|
|
"status": "healthy",
|
|
"service": settings.service_name,
|
|
"version": settings.service_version,
|
|
"timestamp": datetime.utcnow().isoformat(),
|
|
}
|
|
|
|
|
|
@app.post("/nodes/{label}")
|
|
async def create_node(
|
|
label: str,
|
|
properties: dict[str, Any],
|
|
current_user: dict[str, Any] = Depends(get_current_user),
|
|
tenant_id: str = Depends(get_tenant_id),
|
|
) -> dict[str, Any]:
|
|
"""Create a new node"""
|
|
|
|
with tracer.start_as_current_span("create_node") as span:
|
|
span.set_attribute("label", label)
|
|
span.set_attribute("tenant_id", tenant_id)
|
|
|
|
try:
|
|
# Add tenant isolation
|
|
properties["tenant_id"] = tenant_id
|
|
properties["created_by"] = current_user.get("sub", "system")
|
|
|
|
# Validate with SHACL if enabled
|
|
if settings.validate_on_write and shacl_validator:
|
|
await _validate_node(label, properties)
|
|
|
|
# Create node
|
|
result = await neo4j_client.create_node(label, properties)
|
|
|
|
# Update metrics
|
|
metrics.counter("nodes_created_total").labels(
|
|
tenant_id=tenant_id, label=label
|
|
).inc()
|
|
|
|
logger.info("Node created", label=label, node_id=result.get("id"))
|
|
|
|
return {
|
|
"status": "created",
|
|
"label": label,
|
|
"properties": properties,
|
|
"neo4j_result": result,
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error("Failed to create node", label=label, error=str(e))
|
|
raise HTTPException(
|
|
status_code=500, detail=f"Failed to create node: {str(e)}"
|
|
)
|
|
|
|
|
|
@app.get("/nodes/{label}")
|
|
async def get_nodes(
|
|
label: str,
|
|
limit: int = Query(default=100, le=settings.max_results),
|
|
filters: str | None = Query(default=None),
|
|
current_user: dict[str, Any] = Depends(get_current_user),
|
|
tenant_id: str = Depends(get_tenant_id),
|
|
) -> dict[str, Any]:
|
|
"""Get nodes by label with optional filters"""
|
|
|
|
with tracer.start_as_current_span("get_nodes") as span:
|
|
span.set_attribute("label", label)
|
|
span.set_attribute("tenant_id", tenant_id)
|
|
span.set_attribute("limit", limit)
|
|
|
|
try:
|
|
# Parse filters
|
|
filter_dict: dict[str, Any] = {}
|
|
if filters:
|
|
try:
|
|
filter_dict = json.loads(filters)
|
|
except json.JSONDecodeError:
|
|
raise HTTPException(status_code=400, detail="Invalid filters JSON")
|
|
|
|
# Add tenant isolation
|
|
filter_dict["tenant_id"] = tenant_id
|
|
|
|
# Build query
|
|
query = TemporalQueries.get_current_state_query(label, filter_dict)
|
|
query += f" LIMIT {limit}"
|
|
|
|
# Execute query
|
|
results = await neo4j_client.run_query(query)
|
|
|
|
# Update metrics
|
|
metrics.counter("nodes_queried_total").labels(
|
|
tenant_id=tenant_id, label=label
|
|
).inc()
|
|
|
|
return {
|
|
"label": label,
|
|
"count": len(results),
|
|
"nodes": [result["n"] for result in results],
|
|
}
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error("Failed to get nodes", label=label, error=str(e))
|
|
raise HTTPException(
|
|
status_code=500, detail=f"Failed to get nodes: {str(e)}"
|
|
)
|
|
|
|
|
|
@app.get("/nodes/{label}/{node_id}")
|
|
async def get_node(
|
|
label: str,
|
|
node_id: str,
|
|
include_lineage: bool = Query(default=False),
|
|
current_user: dict[str, Any] = Depends(get_current_user),
|
|
tenant_id: str = Depends(get_tenant_id),
|
|
) -> dict[str, Any]:
|
|
"""Get specific node with optional lineage"""
|
|
|
|
with tracer.start_as_current_span("get_node") as span:
|
|
span.set_attribute("label", label)
|
|
span.set_attribute("node_id", node_id)
|
|
span.set_attribute("tenant_id", tenant_id)
|
|
|
|
try:
|
|
# Get node
|
|
query = f"""
|
|
MATCH (n:{label} {{id: $node_id, tenant_id: $tenant_id}})
|
|
WHERE n.retracted_at IS NULL
|
|
RETURN n
|
|
"""
|
|
|
|
results = await neo4j_client.run_query(
|
|
query, {"node_id": node_id, "tenant_id": tenant_id}
|
|
)
|
|
|
|
if not results:
|
|
raise HTTPException(status_code=404, detail="Node not found")
|
|
|
|
node_data = results[0]["n"]
|
|
|
|
# Get lineage if requested
|
|
lineage: list[dict[str, Any]] = []
|
|
if include_lineage:
|
|
lineage = await neo4j_client.get_node_lineage(node_id)
|
|
|
|
return {"node": node_data, "lineage": lineage if include_lineage else None}
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(
|
|
"Failed to get node", label=label, node_id=node_id, error=str(e)
|
|
)
|
|
raise HTTPException(status_code=500, detail=f"Failed to get node: {str(e)}")
|
|
|
|
|
|
@app.put("/nodes/{label}/{node_id}")
|
|
async def update_node(
|
|
label: str,
|
|
node_id: str,
|
|
properties: dict[str, Any],
|
|
current_user: dict[str, Any] = Depends(get_current_user),
|
|
tenant_id: str = Depends(get_tenant_id),
|
|
) -> dict[str, Any]:
|
|
"""Update node with bitemporal versioning"""
|
|
|
|
with tracer.start_as_current_span("update_node") as span:
|
|
span.set_attribute("label", label)
|
|
span.set_attribute("node_id", node_id)
|
|
span.set_attribute("tenant_id", tenant_id)
|
|
|
|
try:
|
|
# Add metadata
|
|
properties["tenant_id"] = tenant_id
|
|
properties["updated_by"] = current_user.get("sub", "system")
|
|
|
|
# Validate with SHACL if enabled
|
|
if settings.validate_on_write and shacl_validator:
|
|
await _validate_node(label, properties)
|
|
|
|
# Update node (creates new version)
|
|
await neo4j_client.update_node(label, node_id, properties)
|
|
|
|
# Update metrics
|
|
metrics.counter("nodes_updated_total").labels(
|
|
tenant_id=tenant_id, label=label
|
|
).inc()
|
|
|
|
logger.info("Node updated", label=label, node_id=node_id)
|
|
|
|
return {
|
|
"status": "updated",
|
|
"label": label,
|
|
"node_id": node_id,
|
|
"properties": properties,
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
"Failed to update node", label=label, node_id=node_id, error=str(e)
|
|
)
|
|
raise HTTPException(
|
|
status_code=500, detail=f"Failed to update node: {str(e)}"
|
|
)
|
|
|
|
|
|
@app.post("/relationships")
|
|
async def create_relationship(
|
|
from_label: str,
|
|
from_id: str,
|
|
to_label: str,
|
|
to_id: str,
|
|
relationship_type: str,
|
|
properties: dict[str, Any] | None = None,
|
|
current_user: dict[str, Any] = Depends(get_current_user),
|
|
tenant_id: str = Depends(get_tenant_id),
|
|
) -> dict[str, Any]:
|
|
"""Create relationship between nodes"""
|
|
|
|
with tracer.start_as_current_span("create_relationship") as span:
|
|
span.set_attribute("from_label", from_label)
|
|
span.set_attribute("to_label", to_label)
|
|
span.set_attribute("relationship_type", relationship_type)
|
|
span.set_attribute("tenant_id", tenant_id)
|
|
|
|
try:
|
|
# Add metadata
|
|
rel_properties = properties or {}
|
|
rel_properties["tenant_id"] = tenant_id
|
|
rel_properties["created_by"] = current_user.get("sub", "system")
|
|
|
|
# Create relationship
|
|
await neo4j_client.create_relationship(
|
|
from_label, from_id, to_label, to_id, relationship_type, rel_properties
|
|
)
|
|
|
|
# Update metrics
|
|
metrics.counter("relationships_created_total").labels(
|
|
tenant_id=tenant_id, relationship_type=relationship_type
|
|
).inc()
|
|
|
|
logger.info(
|
|
"Relationship created",
|
|
from_id=from_id,
|
|
to_id=to_id,
|
|
type=relationship_type,
|
|
)
|
|
|
|
return {
|
|
"status": "created",
|
|
"from_id": from_id,
|
|
"to_id": to_id,
|
|
"relationship_type": relationship_type,
|
|
"properties": rel_properties,
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error("Failed to create relationship", error=str(e))
|
|
raise HTTPException(
|
|
status_code=500, detail=f"Failed to create relationship: {str(e)}"
|
|
)
|
|
|
|
|
|
@app.post("/query")
|
|
async def execute_query(
|
|
query: str,
|
|
parameters: dict[str, Any] | None = None,
|
|
current_user: dict[str, Any] = Depends(get_current_user),
|
|
tenant_id: str = Depends(get_tenant_id),
|
|
) -> dict[str, Any]:
|
|
"""Execute custom Cypher query with tenant isolation"""
|
|
|
|
with tracer.start_as_current_span("execute_query") as span:
|
|
span.set_attribute("tenant_id", tenant_id)
|
|
|
|
try:
|
|
# Add tenant isolation to parameters
|
|
query_params = parameters or {}
|
|
query_params["tenant_id"] = tenant_id
|
|
|
|
# Validate query (basic security check)
|
|
if not _is_safe_query(query):
|
|
raise HTTPException(status_code=400, detail="Unsafe query detected")
|
|
|
|
# Execute query with timeout
|
|
results = await neo4j_client.run_query(query, query_params, max_retries=1)
|
|
|
|
# Update metrics
|
|
metrics.counter("custom_queries_total").labels(tenant_id=tenant_id).inc()
|
|
|
|
return {
|
|
"query": query,
|
|
"parameters": query_params,
|
|
"results": results,
|
|
"count": len(results),
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error("Query execution failed", query=query[:100], error=str(e))
|
|
raise HTTPException(status_code=500, detail=f"Query failed: {str(e)}")
|
|
|
|
|
|
@app.get("/export/rdf")
|
|
async def export_rdf(
|
|
format: str = Query(default="turtle"),
|
|
current_user: dict[str, Any] = Depends(get_current_user),
|
|
tenant_id: str = Depends(get_tenant_id),
|
|
) -> dict[str, Any]:
|
|
"""Export knowledge graph as RDF"""
|
|
|
|
with tracer.start_as_current_span("export_rdf") as span:
|
|
span.set_attribute("format", format)
|
|
span.set_attribute("tenant_id", tenant_id)
|
|
|
|
try:
|
|
# Export tenant-specific data
|
|
rdf_data = await neo4j_client.export_to_rdf(format)
|
|
|
|
# Update metrics
|
|
metrics.counter("rdf_exports_total").labels(
|
|
tenant_id=tenant_id, format=format
|
|
).inc()
|
|
|
|
return {
|
|
"format": format,
|
|
"rdf_data": rdf_data,
|
|
"exported_at": datetime.utcnow().isoformat(),
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error("RDF export failed", format=format, error=str(e))
|
|
raise HTTPException(
|
|
status_code=500, detail=f"RDF export failed: {str(e)}"
|
|
) from e
|
|
|
|
|
|
@app.post("/validate")
|
|
async def validate_graph(
|
|
current_user: dict[str, Any] = Depends(get_current_user),
|
|
tenant_id: str = Depends(get_tenant_id),
|
|
) -> dict[str, Any]:
|
|
"""Validate knowledge graph with SHACL"""
|
|
|
|
with tracer.start_as_current_span("validate_graph") as span:
|
|
span.set_attribute("tenant_id", tenant_id)
|
|
|
|
try:
|
|
if not shacl_validator:
|
|
raise HTTPException(
|
|
status_code=501, detail="SHACL validation not configured"
|
|
)
|
|
|
|
# Export current graph state
|
|
rdf_export = await neo4j_client.export_to_rdf("turtle")
|
|
|
|
# Extract RDF data from export result
|
|
rdf_data = rdf_export.get("rdf_data", "")
|
|
if not rdf_data:
|
|
raise HTTPException(
|
|
status_code=500, detail="Failed to export RDF data for validation"
|
|
)
|
|
|
|
# Run SHACL validation
|
|
validation_result = await shacl_validator.validate_graph(rdf_data)
|
|
|
|
# Update metrics
|
|
metrics.counter("validations_total").labels(
|
|
tenant_id=tenant_id, conforms=validation_result["conforms"]
|
|
).inc()
|
|
|
|
return {
|
|
"conforms": validation_result["conforms"],
|
|
"violations_count": validation_result["violations_count"],
|
|
"results_text": validation_result["results_text"],
|
|
"validated_at": datetime.utcnow().isoformat(),
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error("Graph validation failed", error=str(e))
|
|
raise HTTPException(status_code=500, detail=f"Validation failed: {str(e)}")
|
|
|
|
|
|
async def _validate_node(label: str, properties: dict[str, Any]) -> bool:
|
|
"""Validate node with SHACL"""
|
|
if not shacl_validator:
|
|
return True
|
|
|
|
try:
|
|
# Create a minimal RDF representation of the node for validation
|
|
rdf_lines = ["@prefix tax: <https://tax-kg.example.com/> ."]
|
|
node_uri = "tax:temp_node"
|
|
|
|
# Add type declaration
|
|
rdf_lines.append(f"{node_uri} a tax:{label} .")
|
|
|
|
# Add properties
|
|
for prop, value in properties.items():
|
|
if isinstance(value, str):
|
|
rdf_lines.append(f'{node_uri} tax:{prop} "{value}" .')
|
|
else:
|
|
rdf_lines.append(f"{node_uri} tax:{prop} {value} .")
|
|
|
|
rdf_data = "\n".join(rdf_lines)
|
|
|
|
# Validate the node RDF data
|
|
validation_result = await shacl_validator.validate_graph(rdf_data)
|
|
|
|
if not validation_result["conforms"]:
|
|
logger.warning(
|
|
"Node SHACL validation failed",
|
|
label=label,
|
|
violations=validation_result["violations_count"],
|
|
details=validation_result["results_text"],
|
|
)
|
|
return False
|
|
|
|
logger.debug("Node SHACL validation passed", label=label)
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error("Node SHACL validation error", label=label, error=str(e))
|
|
# Return True to not block operations on validation errors
|
|
return True
|
|
|
|
|
|
def _is_safe_query(query: str) -> bool:
|
|
"""Basic query safety check"""
|
|
query_lower = query.lower()
|
|
|
|
# Block dangerous operations
|
|
dangerous_keywords = [
|
|
"delete",
|
|
"remove",
|
|
"drop",
|
|
"create index",
|
|
"create constraint",
|
|
"load csv",
|
|
"call",
|
|
"foreach",
|
|
]
|
|
|
|
for keyword in dangerous_keywords:
|
|
if keyword in query_lower:
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
@app.exception_handler(HTTPException)
|
|
async def http_exception_handler(request: Request, exc: HTTPException) -> JSONResponse:
|
|
"""Handle HTTP exceptions with RFC7807 format"""
|
|
return JSONResponse(
|
|
status_code=exc.status_code,
|
|
content=ErrorResponse(
|
|
type=f"https://httpstatuses.com/{exc.status_code}",
|
|
title=exc.detail,
|
|
status=exc.status_code,
|
|
detail=exc.detail,
|
|
instance=str(request.url),
|
|
trace_id="",
|
|
).model_dump(),
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import uvicorn
|
|
|
|
uvicorn.run("main:app", host="0.0.0.0", port=8005, reload=True, log_config=None)
|