Files
ai-tax-agent/apps/svc_firm_connectors/main.py
harkon b324ff09ef
Some checks failed
CI/CD Pipeline / Code Quality & Linting (push) Has been cancelled
CI/CD Pipeline / Policy Validation (push) Has been cancelled
CI/CD Pipeline / Test Suite (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-firm-connectors) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-forms) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-hmrc) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ingestion) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-normalize-map) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-ocr) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-indexer) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-reason) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (svc-rpa) (push) Has been cancelled
CI/CD Pipeline / Build Docker Images (ui-review) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-coverage) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-extract) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-kg) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (svc-rag-retriever) (push) Has been cancelled
CI/CD Pipeline / Security Scanning (ui-review) (push) Has been cancelled
CI/CD Pipeline / Generate SBOM (push) Has been cancelled
CI/CD Pipeline / Deploy to Staging (push) Has been cancelled
CI/CD Pipeline / Deploy to Production (push) Has been cancelled
CI/CD Pipeline / Notifications (push) Has been cancelled
Initial commit
2025-10-11 08:41:36 +01:00

763 lines
24 KiB
Python

# FILE: apps/svc-firm-connectors/main.py
# mypy: disable-error-code=union-attr
# Firm database integration with practice management systems
import asyncio
import json
import os
# Import shared libraries
import sys
from datetime import datetime
from typing import Any
import structlog
import ulid
from fastapi import BackgroundTasks, Depends, HTTPException, Request
from fastapi.responses import JSONResponse
sys.path.append(os.path.join(os.path.dirname(__file__), "..", ".."))
from libs.app_factory import create_app
from libs.config import (
BaseAppSettings,
create_event_bus,
create_neo4j_client,
create_vault_client,
)
from libs.events import EventBus, EventPayload, EventTopics
from libs.neo import Neo4jClient
from libs.observability import get_metrics, get_tracer, setup_observability
from libs.schemas import ErrorResponse, FirmSyncRequest, FirmSyncResponse
from libs.security import VaultTransitHelper, get_current_user, get_tenant_id
logger = structlog.get_logger()
class FirmConnectorsSettings(BaseAppSettings):
"""Settings for firm connectors service"""
service_name: str = "svc-firm-connectors"
# Supported practice management systems
supported_systems: list[str] = [
"iris",
"sage",
"xero",
"quickbooks",
"freeagent",
"kashflow",
]
# Sync configuration
sync_batch_size: int = 100
max_sync_retries: int = 3
sync_timeout: int = 300 # 5 minutes
# Rate limiting
api_rate_limit: int = 100 # requests per minute
# Data mapping
field_mappings_dir: str = "config/firm_mappings"
# Create app and settings
app, settings = create_app(
service_name="svc-firm-connectors",
title="Tax Agent Firm Connectors Service",
description="Practice management system integration",
settings_class=FirmConnectorsSettings,
)
# Global clients
vault_helper: VaultTransitHelper | None = None
neo4j_client: Neo4jClient | None = None
event_bus: EventBus | None = None
tracer = get_tracer("svc-firm-connectors")
metrics = get_metrics()
@app.on_event("startup")
async def startup_event() -> None:
"""Initialize service dependencies"""
global vault_helper, neo4j_client, event_bus
logger.info("Starting firm connectors service")
# Setup observability
setup_observability(settings)
# Initialize Vault helper
vault_client = create_vault_client(settings)
vault_helper = VaultTransitHelper(vault_client, "tax-agent-transit")
# Initialize Neo4j client
neo4j_driver = create_neo4j_client(settings)
neo4j_client = Neo4jClient(neo4j_driver)
# Initialize event bus
event_bus = create_event_bus(settings)
await event_bus.start() # fmt: skip # pyright: ignore[reportOptionalMemberAccess]
logger.info("Firm connectors service started successfully")
@app.on_event("shutdown")
async def shutdown_event() -> None:
"""Cleanup service dependencies"""
global neo4j_client, event_bus
logger.info("Shutting down firm connectors service")
if neo4j_client:
await neo4j_client.close()
if event_bus:
await event_bus.stop()
logger.info("Firm connectors service shutdown complete")
@app.get("/health")
async def health_check() -> dict[str, Any]:
"""Health check endpoint"""
return {
"status": "healthy",
"service": settings.service_name,
"version": settings.service_version,
"timestamp": datetime.utcnow().isoformat(),
"supported_systems": settings.supported_systems,
}
@app.post("/sync", response_model=FirmSyncResponse)
async def sync_firm_data(
request_data: FirmSyncRequest,
background_tasks: BackgroundTasks,
current_user: dict[str, Any] = Depends(get_current_user),
tenant_id: str = Depends(get_tenant_id),
) -> FirmSyncResponse:
"""Sync data from practice management system"""
with tracer.start_as_current_span("sync_firm_data") as span:
span.set_attribute("system", request_data.system)
span.set_attribute("tenant_id", tenant_id)
span.set_attribute("sync_type", request_data.sync_type)
try:
# Validate system
if request_data.system not in settings.supported_systems:
raise HTTPException(
status_code=400, detail=f"Unsupported system: {request_data.system}"
)
# Generate sync ID
sync_id = str(ulid.new())
span.set_attribute("sync_id", sync_id)
# Start background sync
background_tasks.add_task(
_sync_firm_data_async,
request_data.system,
request_data.sync_type,
request_data.connection_config,
tenant_id,
sync_id,
current_user.get("sub", "system"),
)
logger.info(
"Firm data sync started",
sync_id=sync_id,
system=request_data.system,
sync_type=request_data.sync_type,
)
return FirmSyncResponse(
firm_id=request_data.firm_id,
status="syncing",
message=f"Sync started with ID: {sync_id}",
synced_entities=0,
errors=[],
)
except HTTPException:
raise
except Exception as e:
logger.error("Failed to start firm sync", error=str(e))
raise HTTPException(status_code=500, detail="Failed to start firm sync")
@app.get("/sync/{sync_id}")
async def get_sync_status(
sync_id: str,
current_user: dict[str, Any] = Depends(get_current_user),
tenant_id: str = Depends(get_tenant_id),
) -> dict[str, Any]:
"""Get sync status"""
with tracer.start_as_current_span("get_sync_status") as span:
span.set_attribute("sync_id", sync_id)
span.set_attribute("tenant_id", tenant_id)
try:
# Get sync record from Neo4j
query = """
MATCH (s:FirmSync {sync_id: $sync_id, tenant_id: $tenant_id})
WHERE s.retracted_at IS NULL
RETURN s
"""
results = await neo4j_client.run_query( # pyright: ignore[reportOptionalMemberAccess]
query, {"sync_id": sync_id, "tenant_id": tenant_id}
)
if not results:
raise HTTPException(status_code=404, detail="Sync not found")
sync_record = results[0]["s"]
return {
"sync_id": sync_id,
"system": sync_record.get("system"),
"status": sync_record.get("status"),
"records_synced": sync_record.get("records_synced", 0),
"total_records": sync_record.get("total_records", 0),
"started_at": sync_record.get("started_at"),
"completed_at": sync_record.get("completed_at"),
"errors": json.loads(sync_record.get("errors", "[]")),
}
except HTTPException:
raise
except Exception as e:
logger.error("Failed to get sync status", sync_id=sync_id, error=str(e))
raise HTTPException(status_code=500, detail="Failed to get sync status")
@app.post("/connections/{system}/test")
async def test_connection(
system: str,
connection_config: dict[str, Any],
current_user: dict[str, Any] = Depends(get_current_user),
tenant_id: str = Depends(get_tenant_id),
) -> dict[str, Any]:
"""Test connection to practice management system"""
with tracer.start_as_current_span("test_connection") as span:
span.set_attribute("system", system)
span.set_attribute("tenant_id", tenant_id)
try:
# Validate system
if system not in settings.supported_systems:
raise HTTPException(
status_code=400, detail=f"Unsupported system: {system}"
)
# Test connection based on system
if system == "iris":
result = await _test_iris_connection(connection_config)
elif system == "sage":
result = await _test_sage_connection(connection_config)
elif system == "xero":
result = await _test_xero_connection(connection_config)
elif system == "quickbooks":
result = await _test_quickbooks_connection(connection_config)
elif system == "freeagent":
result = await _test_freeagent_connection(connection_config)
elif system == "kashflow":
result = await _test_kashflow_connection(connection_config)
else:
raise HTTPException(
status_code=400,
detail=f"Connection test not implemented for {system}",
)
return {
"system": system,
"connection_status": result["status"],
"message": result["message"],
"capabilities": result.get("capabilities", []),
"test_timestamp": datetime.utcnow().isoformat(),
}
except HTTPException:
raise
except Exception as e:
logger.error("Connection test failed", system=system, error=str(e))
raise HTTPException(
status_code=500, detail=f"Connection test failed: {str(e)}"
)
@app.get("/systems")
async def list_supported_systems(
current_user: dict[str, Any] = Depends(get_current_user),
tenant_id: str = Depends(get_tenant_id),
) -> dict[str, Any]:
"""List supported practice management systems"""
try:
systems_info: list[Any] = []
for system in settings.supported_systems:
system_info = {
"system": system,
"name": _get_system_name(system),
"capabilities": _get_system_capabilities(system),
"connection_fields": _get_connection_fields(system),
}
systems_info.append(system_info)
return {"supported_systems": systems_info, "total_systems": len(systems_info)}
except Exception as e:
logger.error("Failed to list systems", error=str(e))
raise HTTPException(status_code=500, detail="Failed to list systems")
async def _sync_firm_data_async(
system: str,
sync_type: str,
connection_config: dict[str, Any],
tenant_id: str,
sync_id: str,
actor: str,
) -> None:
"""Sync firm data asynchronously"""
with tracer.start_as_current_span("sync_firm_data_async") as span:
span.set_attribute("sync_id", sync_id)
span.set_attribute("system", system)
span.set_attribute("sync_type", sync_type)
try:
# Create sync record
await _create_sync_record(sync_id, system, sync_type, tenant_id)
# Perform sync based on system
if system == "iris":
sync_result = await _sync_iris_data(
connection_config, sync_type, tenant_id
)
elif system == "sage":
sync_result = await _sync_sage_data(
connection_config, sync_type, tenant_id
)
elif system == "xero":
sync_result = await _sync_xero_data(
connection_config, sync_type, tenant_id
)
elif system == "quickbooks":
sync_result = await _sync_quickbooks_data(
connection_config, sync_type, tenant_id
)
elif system == "freeagent":
sync_result = await _sync_freeagent_data(
connection_config, sync_type, tenant_id
)
elif system == "kashflow":
sync_result = await _sync_kashflow_data(
connection_config, sync_type, tenant_id
)
else:
raise Exception(f"Sync not implemented for {system}")
# Update sync record
await _update_sync_record(sync_id, "completed", sync_result)
# Update metrics
metrics.counter("firm_syncs_completed_total").labels(
tenant_id=tenant_id, system=system, sync_type=sync_type
).inc()
metrics.histogram("sync_records_count").labels(
system=system, sync_type=sync_type
).observe(sync_result["records_synced"])
# Publish completion event
event_payload = EventPayload(
data={
"sync_id": sync_id,
"system": system,
"sync_type": sync_type,
"tenant_id": tenant_id,
"records_synced": sync_result["records_synced"],
"entities_created": sync_result.get("entities_created", 0),
},
actor=actor,
tenant_id=tenant_id,
)
await event_bus.publish(EventTopics.FIRM_SYNC_COMPLETED, event_payload) # type: ignore
logger.info(
"Firm sync completed",
sync_id=sync_id,
system=system,
records=sync_result["records_synced"],
)
except Exception as e:
logger.error("Firm sync failed", sync_id=sync_id, error=str(e))
# Update sync record with error
await _update_sync_record(sync_id, "error", {"error": str(e)})
# Update error metrics
metrics.counter("firm_sync_errors_total").labels(
tenant_id=tenant_id, system=system, error_type=type(e).__name__
).inc()
async def _test_iris_connection(config: dict[str, Any]) -> dict[str, Any]:
"""Test IRIS connection"""
# Mock implementation
await asyncio.sleep(1)
return {
"status": "success",
"message": "Connection successful",
"capabilities": ["clients", "jobs", "documents"],
}
async def _test_sage_connection(config: dict[str, Any]) -> dict[str, Any]:
"""Test Sage connection"""
# Mock implementation
await asyncio.sleep(1)
return {
"status": "success",
"message": "Connection successful",
"capabilities": ["customers", "suppliers", "transactions"],
}
async def _test_xero_connection(config: dict[str, Any]) -> dict[str, Any]:
"""Test Xero connection"""
# Mock implementation
await asyncio.sleep(1)
return {
"status": "success",
"message": "Connection successful",
"capabilities": ["contacts", "invoices", "bank_transactions"],
}
async def _test_quickbooks_connection(config: dict[str, Any]) -> dict[str, Any]:
"""Test QuickBooks connection"""
# Mock implementation
await asyncio.sleep(1)
return {
"status": "success",
"message": "Connection successful",
"capabilities": ["customers", "vendors", "items", "transactions"],
}
async def _test_freeagent_connection(config: dict[str, Any]) -> dict[str, Any]:
"""Test FreeAgent connection"""
# Mock implementation
await asyncio.sleep(1)
return {
"status": "success",
"message": "Connection successful",
"capabilities": ["contacts", "projects", "invoices", "expenses"],
}
async def _test_kashflow_connection(config: dict[str, Any]) -> dict[str, Any]:
"""Test KashFlow connection"""
# Mock implementation
await asyncio.sleep(1)
return {
"status": "success",
"message": "Connection successful",
"capabilities": ["customers", "suppliers", "invoices", "receipts"],
}
async def _sync_iris_data(
config: dict[str, Any], sync_type: str, tenant_id: str
) -> dict[str, Any]:
"""Sync data from IRIS"""
# Mock implementation
await asyncio.sleep(2)
# Simulate syncing client data
mock_clients = [
{"id": "client_1", "name": "John Doe", "utr": "1234567890"},
{"id": "client_2", "name": "Jane Smith", "utr": "0987654321"},
]
entities_created = 0
for client in mock_clients:
# Create taxpayer profile in KG
taxpayer_properties = {
"taxpayer_id": client["id"],
"name": client["name"],
"utr": client["utr"],
"tenant_id": tenant_id,
"source": "iris_sync",
"extractor_version": "1.0.0",
"valid_from": datetime.utcnow(),
"asserted_at": datetime.utcnow(),
}
await neo4j_client.create_node("TaxpayerProfile", taxpayer_properties) # fmt: skip # pyright: ignore[reportOptionalMemberAccess]
entities_created += 1
return {
"records_synced": len(mock_clients),
"entities_created": entities_created,
"sync_type": sync_type,
}
async def _sync_sage_data(
config: dict[str, Any], sync_type: str, tenant_id: str
) -> dict[str, Any]:
"""Sync data from Sage"""
# Mock implementation
await asyncio.sleep(2)
return {"records_synced": 5, "entities_created": 5, "sync_type": sync_type}
async def _sync_xero_data(
config: dict[str, Any], sync_type: str, tenant_id: str
) -> dict[str, Any]:
"""Sync data from Xero"""
# Mock implementation
await asyncio.sleep(2)
return {"records_synced": 8, "entities_created": 8, "sync_type": sync_type}
async def _sync_quickbooks_data(
config: dict[str, Any], sync_type: str, tenant_id: str
) -> dict[str, Any]:
"""Sync data from QuickBooks"""
# Mock implementation
await asyncio.sleep(2)
return {"records_synced": 12, "entities_created": 12, "sync_type": sync_type}
async def _sync_freeagent_data(
config: dict[str, Any], sync_type: str, tenant_id: str
) -> dict[str, Any]:
"""Sync data from FreeAgent"""
# Mock implementation
await asyncio.sleep(2)
return {"records_synced": 6, "entities_created": 6, "sync_type": sync_type}
async def _sync_kashflow_data(
config: dict[str, Any], sync_type: str, tenant_id: str
) -> dict[str, Any]:
"""Sync data from KashFlow"""
# Mock implementation
await asyncio.sleep(2)
return {"records_synced": 4, "entities_created": 4, "sync_type": sync_type}
def _get_system_name(system: str) -> str:
"""Get human-readable system name"""
names = {
"iris": "IRIS Practice Management",
"sage": "Sage Practice Management",
"xero": "Xero",
"quickbooks": "QuickBooks",
"freeagent": "FreeAgent",
"kashflow": "KashFlow",
}
return names.get(system, system.title())
def _get_system_capabilities(system: str) -> list[str]:
"""Get system capabilities"""
capabilities = {
"iris": ["clients", "jobs", "documents", "time_tracking"],
"sage": ["customers", "suppliers", "transactions", "reports"],
"xero": ["contacts", "invoices", "bank_transactions", "reports"],
"quickbooks": ["customers", "vendors", "items", "transactions", "reports"],
"freeagent": ["contacts", "projects", "invoices", "expenses", "time_tracking"],
"kashflow": ["customers", "suppliers", "invoices", "receipts", "reports"],
}
return capabilities.get(system, [])
def _get_connection_fields(system: str) -> list[dict[str, Any]]:
"""Get required connection fields for system"""
fields = {
"iris": [
{
"name": "api_key",
"type": "string",
"required": True,
"description": "IRIS API Key",
},
{
"name": "base_url",
"type": "string",
"required": True,
"description": "IRIS Base URL",
},
],
"sage": [
{
"name": "username",
"type": "string",
"required": True,
"description": "Sage Username",
},
{
"name": "password",
"type": "password",
"required": True,
"description": "Sage Password",
},
{
"name": "database",
"type": "string",
"required": True,
"description": "Database Name",
},
],
"xero": [
{
"name": "client_id",
"type": "string",
"required": True,
"description": "Xero Client ID",
},
{
"name": "client_secret",
"type": "password",
"required": True,
"description": "Xero Client Secret",
},
{
"name": "tenant_id",
"type": "string",
"required": True,
"description": "Xero Tenant ID",
},
],
"quickbooks": [
{
"name": "client_id",
"type": "string",
"required": True,
"description": "QuickBooks Client ID",
},
{
"name": "client_secret",
"type": "password",
"required": True,
"description": "QuickBooks Client Secret",
},
{
"name": "company_id",
"type": "string",
"required": True,
"description": "Company ID",
},
],
"freeagent": [
{
"name": "client_id",
"type": "string",
"required": True,
"description": "FreeAgent Client ID",
},
{
"name": "client_secret",
"type": "password",
"required": True,
"description": "FreeAgent Client Secret",
},
],
"kashflow": [
{
"name": "username",
"type": "string",
"required": True,
"description": "KashFlow Username",
},
{
"name": "password",
"type": "password",
"required": True,
"description": "KashFlow Password",
},
],
}
return fields.get(system, [])
async def _create_sync_record(
sync_id: str, system: str, sync_type: str, tenant_id: str
) -> None:
"""Create sync record in knowledge graph"""
sync_properties = {
"sync_id": sync_id,
"system": system,
"sync_type": sync_type,
"tenant_id": tenant_id,
"status": "running",
"started_at": datetime.utcnow().isoformat(),
"records_synced": 0,
"errors": "[]",
"source": "firm_connectors",
"extractor_version": "1.0.0",
"valid_from": datetime.utcnow(),
"asserted_at": datetime.utcnow(),
}
await neo4j_client.create_node("FirmSync", sync_properties) # fmt: skip # pyright: ignore[reportOptionalMemberAccess]
async def _update_sync_record(
sync_id: str, status: str, result: dict[str, Any]
) -> None:
"""Update sync record with results"""
update_properties = {
"status": status,
"completed_at": datetime.utcnow().isoformat(),
"records_synced": result.get("records_synced", 0),
"total_records": result.get("total_records", 0),
"errors": json.dumps(result.get("errors", [])),
}
# This would update the existing node
# For now, just log
logger.debug(
"Sync record updated",
sync_id=sync_id,
status=status,
properties=update_properties,
)
@app.exception_handler(HTTPException)
async def http_exception_handler(request: Request, exc: HTTPException) -> JSONResponse:
"""Handle HTTP exceptions with RFC7807 format"""
return JSONResponse(
status_code=exc.status_code,
content=ErrorResponse(
type=f"https://httpstatuses.com/{exc.status_code}",
title=exc.detail,
status=exc.status_code,
detail=exc.detail,
instance=str(request.url),
trace_id="",
).model_dump(),
)
if __name__ == "__main__":
import uvicorn
uvicorn.run("main:app", host="0.0.0.0", port=8011, reload=True, log_config=None)